CMS 3D CMS Logo

BaseMVAValueMapProducer.h
Go to the documentation of this file.
1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
3 
4 // -*- C++ -*-
5 //
6 // Package: PhysicsTools/PatAlgos
7 // Class: BaseMVAValueMapProducer
8 //
16 //
17 // Original Author: Andre Rizzi
18 // Created: Mon, 07 Sep 2017 09:18:03 GMT
19 //
20 //
21 
22 // system include files
23 #include <memory>
24 
25 // user include files
28 
31 
34 
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
37 
44 
50 
51 #include <string>
52 //
53 // class declaration
54 //
55 
56 class BaseMVACache {
57 public:
58  BaseMVACache(const std::string& model_path, const std::string& backend, const bool disableONNXGraphOpt) {
59  if (backend == "TF") {
62  } else if (backend == "ONNX") {
63  if (disableONNXGraphOpt) {
64  Ort::SessionOptions sess_opts;
66  sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
67  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path, &sess_opts);
68  } else {
69  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
70  }
71  }
72  }
74 
75  tensorflow::Session* getTFSession() const { return tf_session_; }
76  const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
77 
78 private:
79  std::shared_ptr<tensorflow::GraphDef> graph_;
80  tensorflow::Session* tf_session_ = nullptr;
81  std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
82 };
83 
84 template <typename T>
85 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
86 public:
88  : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
89  name_(iConfig.getParameter<std::string>("name")),
90  backend_(iConfig.getParameter<std::string>("backend")),
91  weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
92  tmva_(backend_ == "TMVA"),
93  tf_(backend_ == "TF"),
94  onnx_(backend_ == "ONNX"),
95  batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
96  if (tmva_) {
97  reader_ = new TMVA::Reader();
98  isClassifier_ = iConfig.getParameter<bool>("isClassifier");
99  }
100 
101  std::vector<edm::ParameterSet> const& varsPSet = iConfig.getParameter<std::vector<edm::ParameterSet>>("variables");
102  values_.resize(varsPSet.size());
103  size_t i = 0;
104  for (const edm::ParameterSet& var_pset : varsPSet) {
105  const std::string& vname = var_pset.getParameter<std::string>("name");
106  if (var_pset.existsAs<std::string>("expr"))
107  funcs_.emplace_back(
108  std::pair<std::string, StringObjectFunction<T, true>>(vname, var_pset.getParameter<std::string>("expr")));
109  positions_[vname] = i;
110  if (tmva_)
111  reader_->AddVariable(vname, (&values_.front()) + i);
112  i++;
113  }
114 
115  if (tmva_) {
117  }
118  if (tf_ || onnx_) {
119  inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
120  outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
121  output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
122  for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
123  output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
124  }
125  }
126 
127  if (tmva_)
128  produces<edm::ValueMap<float>>();
129  else {
130  for (const auto& n : output_names_) {
131  produces<edm::ValueMap<float>>(n);
132  }
133  }
134  }
136 
137  void setValue(const std::string var, float val) {
138  if (positions_.find(var) != positions_.end())
140  }
141 
142  static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
143  static void globalEndJob(const BaseMVACache* cache);
144 
146  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
147 
148 private:
149  void beginStream(edm::StreamID) override{};
150  void produce(edm::Event&, const edm::EventSetup&) override;
151  void endStream() override{};
152 
155  virtual void fillAdditionalVariables(const T&) {}
156 
158  std::map<std::string, size_t> positions_;
159  std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
160  std::vector<float> values_;
161  TMVA::Reader* reader_;
162 
167  bool tmva_;
168  bool tf_;
169  bool onnx_;
173  std::vector<std::string> output_names_;
174  std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
175 };
176 
177 template <typename T>
180  iEvent.getByToken(src_, src);
181  readAdditionalCollections(iEvent, iSetup);
182  std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
183  for (auto& v : mvaOut)
184  v.reserve(src->size());
185 
186  if (batch_eval_) {
187  if (!src->empty()) {
188  std::vector<float> data;
189  data.reserve(src->size() * positions_.size());
190  for (auto const& o : *src) {
191  for (auto const& p : funcs_) {
192  setValue(p.first, p.second(o));
193  }
194  fillAdditionalVariables(o);
195  data.insert(data.end(), values_.begin(), values_.end());
196  }
197 
198  std::vector<float> outputs;
199  if (tf_) {
200  tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
201  tensorflow::NamedTensorList input_tensors;
202  input_tensors.resize(1);
203  input_tensors[0] =
204  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
205  for (unsigned i = 0; i < data.size(); ++i) {
206  input_tensors[0].second.flat<float>()(i) = data[i];
207  }
208  std::vector<tensorflow::Tensor> output_tensors;
209  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
210  for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
211  outputs.push_back(output_tensors.at(0).flat<float>()(i));
212  }
213  } else if (onnx_) {
215  outputs =
216  globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
217  }
218 
219  const unsigned outdim = outputs.size() / src->size();
220  for (unsigned i = 0; i < src->size(); ++i) {
221  std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
222  for (size_t k = 0; k < output_names_.size(); k++) {
223  mvaOut[k].push_back(output_formulas_[k](tmpOut));
224  }
225  }
226  }
227  } else {
228  for (auto const& o : *src) {
229  for (auto const& p : funcs_) {
230  setValue(p.first, p.second(o));
231  }
232  fillAdditionalVariables(o);
233  if (tmva_) {
234  mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
235  } else {
236  std::vector<float> tmpOut;
237  if (tf_) {
238  //currently support only one input sensor to reuse the TMVA like config
239  tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
240  tensorflow::NamedTensorList input_tensors;
241  input_tensors.resize(1);
242  input_tensors[0] =
243  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
244  for (size_t j = 0; j < values_.size(); j++) {
245  input_tensors[0].second.matrix<float>()(0, j) = values_[j];
246  }
247  std::vector<tensorflow::Tensor> outputs;
248  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
249  for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
250  tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
251  } else if (onnx_) {
252  cms::Ort::FloatArrays inputs{values_};
253  tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
254  }
255  for (size_t k = 0; k < output_names_.size(); k++)
256  mvaOut[k].push_back(output_formulas_[k](tmpOut));
257  }
258  }
259  }
260 
261  size_t k = 0;
262  for (auto& m : mvaOut) {
263  std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
265  filler.insert(src, m.begin(), m.end());
266  filler.fill();
267  iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
268  k++;
269  }
270 }
271 
272 template <typename T>
274  std::string backend = cfg.getParameter<std::string>("backend");
275  bool disableONNXGraphOpt = false;
276  if (backend == "ONNX")
277  disableONNXGraphOpt = cfg.getParameter<bool>("disableONNXGraphOpt");
278  return std::make_unique<BaseMVACache>(
279  cfg.getParameter<edm::FileInPath>("weightFile").fullPath(), backend, disableONNXGraphOpt);
280 }
281 
282 template <typename T>
284 
285 template <typename T>
288  desc.add<edm::InputTag>("src")->setComment("input physics object collection");
289 
290  desc.add<std::string>("name")->setComment("output score variable name");
291  desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file, or TF/ONNX model file");
292  desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
293 
295  variable.add<std::string>("name")->setComment("name of the variable, either created by expr, or internally by code");
296  variable.addOptional<std::string>("expr")->setComment(
297  "a function to define the content of the model input, absence of it means the leaf is computed internally");
298  variable.setComment("a PSet to define an entry to the ML model");
299  desc.addVPSet("variables", variable);
300 
302  "inputTensorName", "", true, edm::Comment("Name of tensorflow input tensor in the model"));
304  "outputTensorName", "", true, edm::Comment("Name of tensorflow output tensor in the model"));
306  "outputNames",
307  std::vector<std::string>(),
308  true,
309  edm::Comment("Names of the output values to be used in the output valuemap"));
311  "outputFormulas",
312  std::vector<std::string>(),
313  true,
314  edm::Comment("Formulas to be used to post process the output"));
316  "disableONNXGraphOpt", false, true, edm::Comment("Disable ONNX runtime graph optimization"));
317 
319  "backend", "TMVA", true, edm::Comment("the backend to evaluate the model:tmva, tf or onnx")),
321  "isClassifier", true, true, edm::Comment("a classification or regression")) or
322  "TF" >> (itn and otn and on and of) or "ONNX" >> (itn and otn and on and of and dog));
323 
324  return desc;
325 }
326 
327 template <typename T>
329  edm::ParameterSetDescription desc = getDescription();
331  if (typeid(T) == typeid(pat::Jet))
332  modname += "Jet";
333  else if (typeid(T) == typeid(pat::Muon))
334  modname += "Muon";
335  else if (typeid(T) == typeid(pat::Electron))
336  modname += "Ele";
337  modname += "BaseMVAValueMapProducer";
338  descriptions.add(modname, desc);
339 }
340 
341 #endif
static edm::ParameterSetDescription getDescription()
virtual void fillAdditionalVariables(const T &)
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
::Ort::SessionOptions defaultSessionOptions(Backend backend=Backend::cpu)
Definition: ONNXRuntime.cc:76
static std::unique_ptr< BaseMVACache > initializeGlobalCache(const edm::ParameterSet &cfg)
std::shared_ptr< tensorflow::GraphDef > graph_
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:120
static void globalEndJob(const BaseMVACache *cache)
void produce(edm::Event &, const edm::EventSetup &) override
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated ...
const cms::Ort::ONNXRuntime & getONNXSession() const
void setValue(const std::string var, float val)
std::pair< std::string, Tensor > NamedTensor
Definition: TensorFlow.h:30
int iEvent
Definition: GenABIO.cc:224
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:272
bool closeSession(Session *&session)
Definition: TensorFlow.cc:234
tensorflow::Session * getTFSession() const
std::unique_ptr< cms::Ort::ONNXRuntime > ort_
Session * createSession()
Definition: TensorFlow.cc:137
edm::EDGetTokenT< edm::View< T > > src_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Analysis-level electron class.
Definition: Electron.h:51
std::map< std::string, size_t > positions_
Analysis-level calorimeter jet class.
Definition: Jet.h:77
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
tensorflow::Session * tf_session_
deadvectors [0] push_back({0.0175431, 0.538005, 6.80997, 13.29})
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
def cache(function)
Definition: utilities.py:3
void beginStream(edm::StreamID) override
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig, const BaseMVACache *cache)
long double T
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
BaseMVACache(const std::string &model_path, const std::string &backend, const bool disableONNXGraphOpt)
Analysis-level muon class.
Definition: Muon.h:51
def move(src, dest)
Definition: eostools.py:511
std::vector< std::string > output_names_
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables