CMS 3D CMS Logo

BaseMVAValueMapProducer.h
Go to the documentation of this file.
1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
3 
4 // -*- C++ -*-
5 //
6 // Package: PhysicsTools/PatAlgos
7 // Class: BaseMVAValueMapProducer
8 //
16 //
17 // Original Author: Andre Rizzi
18 // Created: Mon, 07 Sep 2017 09:18:03 GMT
19 //
20 //
21 
22 // system include files
23 #include <memory>
24 
25 // user include files
28 
31 
34 
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
37 
44 
50 
51 #include <string>
52 //
53 // class declaration
54 //
55 
56 class BaseMVACache {
57 public:
58  BaseMVACache(const std::string& model_path, const std::string& backend, const bool disableONNXGraphOpt) {
59  if (backend == "TF") {
62  } else if (backend == "ONNX") {
63  if (disableONNXGraphOpt) {
64  Ort::SessionOptions sess_opts;
66  sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
67  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path, &sess_opts);
68  } else {
69  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
70  }
71  }
72  }
74 
75  tensorflow::Session* getTFSession() const { return tf_session_; }
76  const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
77 
78 private:
79  std::shared_ptr<tensorflow::GraphDef> graph_;
80  tensorflow::Session* tf_session_ = nullptr;
81  std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
82 };
83 
84 template <typename T>
85 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
86 public:
88  : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
89  variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
90  name_(iConfig.getParameter<std::string>("name")),
91  backend_(iConfig.getParameter<std::string>("backend")),
92  weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
93  isClassifier_(iConfig.getParameter<bool>("isClassifier")),
94  tmva_(backend_ == "TMVA"),
95  tf_(backend_ == "TF"),
96  onnx_(backend_ == "ONNX"),
97  batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
98  if (!(tmva_ || tf_ || onnx_)) {
99  throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
100  }
101 
102  if (tmva_)
103  reader_ = new TMVA::Reader();
104  edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
105  for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
106  funcs_.emplace_back(
107  std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
108  }
109 
110  values_.resize(variablesOrder_.size());
111  size_t i = 0;
112  for (const auto& v : variablesOrder_) {
113  positions_[v] = i;
114  if (tmva_)
115  reader_->AddVariable(v, (&values_.front()) + i);
116  i++;
117  }
118  // reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
119  if (tmva_) {
121  }
122  if (tf_ || onnx_) {
123  inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
124  outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
125  output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
126  for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
127  output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
128  }
129  }
130 
131  if (tmva_)
132  produces<edm::ValueMap<float>>();
133  else {
134  for (const auto& n : output_names_) {
135  produces<edm::ValueMap<float>>(n);
136  }
137  }
138  }
140 
141  void setValue(const std::string var, float val) {
142  if (positions_.find(var) != positions_.end())
144  }
145 
146  static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
147  static void globalEndJob(const BaseMVACache* cache);
148 
150  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
151 
152 private:
153  void beginStream(edm::StreamID) override{};
154  void produce(edm::Event&, const edm::EventSetup&) override;
155  void endStream() override{};
156 
159  virtual void fillAdditionalVariables(const T&) {}
160 
162  std::map<std::string, size_t> positions_;
163  std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
164  std::vector<std::string> variablesOrder_;
165  std::vector<float> values_;
166  TMVA::Reader* reader_;
167 
172  bool tmva_;
173  bool tf_;
174  bool onnx_;
178  std::vector<std::string> output_names_;
179  std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
180 };
181 
182 template <typename T>
185  iEvent.getByToken(src_, src);
186  readAdditionalCollections(iEvent, iSetup);
187  std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
188  for (auto& v : mvaOut)
189  v.reserve(src->size());
190 
191  if (batch_eval_) {
192  if (!src->empty()) {
193  std::vector<float> data;
194  data.reserve(src->size() * positions_.size());
195  for (auto const& o : *src) {
196  for (auto const& p : funcs_) {
197  setValue(p.first, p.second(o));
198  }
199  fillAdditionalVariables(o);
200  data.insert(data.end(), values_.begin(), values_.end());
201  }
202 
203  std::vector<float> outputs;
204  if (tf_) {
205  tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
206  tensorflow::NamedTensorList input_tensors;
207  input_tensors.resize(1);
208  input_tensors[0] =
209  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
210  for (unsigned i = 0; i < data.size(); ++i) {
211  input_tensors[0].second.flat<float>()(i) = data[i];
212  }
213  std::vector<tensorflow::Tensor> output_tensors;
214  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
215  for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
216  outputs.push_back(output_tensors.at(0).flat<float>()(i));
217  }
218  } else if (onnx_) {
220  outputs =
221  globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
222  }
223 
224  const unsigned outdim = outputs.size() / src->size();
225  for (unsigned i = 0; i < src->size(); ++i) {
226  std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
227  for (size_t k = 0; k < output_names_.size(); k++) {
228  mvaOut[k].push_back(output_formulas_[k](tmpOut));
229  }
230  }
231  }
232  } else {
233  for (auto const& o : *src) {
234  for (auto const& p : funcs_) {
235  setValue(p.first, p.second(o));
236  }
237  fillAdditionalVariables(o);
238  if (tmva_) {
239  mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
240  } else {
241  std::vector<float> tmpOut;
242  if (tf_) {
243  //currently support only one input sensor to reuse the TMVA like config
244  tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
245  tensorflow::NamedTensorList input_tensors;
246  input_tensors.resize(1);
247  input_tensors[0] =
248  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
249  for (size_t j = 0; j < values_.size(); j++) {
250  input_tensors[0].second.matrix<float>()(0, j) = values_[j];
251  }
252  std::vector<tensorflow::Tensor> outputs;
253  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
254  for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
255  tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
256  } else if (onnx_) {
257  cms::Ort::FloatArrays inputs{values_};
258  tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
259  }
260  for (size_t k = 0; k < output_names_.size(); k++)
261  mvaOut[k].push_back(output_formulas_[k](tmpOut));
262  }
263  }
264  }
265 
266  size_t k = 0;
267  for (auto& m : mvaOut) {
268  std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
270  filler.insert(src, m.begin(), m.end());
271  filler.fill();
272  iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
273  k++;
274  }
275 }
276 
277 template <typename T>
279  return std::make_unique<BaseMVACache>(cfg.getParameter<edm::FileInPath>("weightFile").fullPath(),
280  cfg.getParameter<std::string>("backend"),
281  cfg.getParameter<bool>("disableONNXGraphOpt"));
282 }
283 
284 template <typename T>
286 
287 template <typename T>
290  desc.add<edm::InputTag>("src")->setComment("input physics object collection");
291  desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
292  desc.add<std::string>("name")->setComment("output score variable name");
293  desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
295  variables.setAllowAnything();
296  desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
297  desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
298  desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
299  desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
300  desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
301  desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
302  ->setComment("Names of the output values to be used in the output valuemap");
303  desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
304  ->setComment("Formulas to be used to post process the output");
305  desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
306  desc.add<bool>("disableONNXGraphOpt", false)->setComment("Disable ONNX runtime graph optimization");
307 
308  return desc;
309 }
310 
311 template <typename T>
313  edm::ParameterSetDescription desc = getDescription();
315  if (typeid(T) == typeid(pat::Jet))
316  modname += "Jet";
317  else if (typeid(T) == typeid(pat::Muon))
318  modname += "Muon";
319  else if (typeid(T) == typeid(pat::Electron))
320  modname += "Ele";
321  modname += "BaseMVAValueMapProducer";
322  descriptions.add(modname, desc);
323 }
324 
325 #endif
static edm::ParameterSetDescription getDescription()
virtual void fillAdditionalVariables(const T &)
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
::Ort::SessionOptions defaultSessionOptions(Backend backend=Backend::cpu)
Definition: ONNXRuntime.cc:76
static std::unique_ptr< BaseMVACache > initializeGlobalCache(const edm::ParameterSet &cfg)
std::shared_ptr< tensorflow::GraphDef > graph_
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:120
static void globalEndJob(const BaseMVACache *cache)
void produce(edm::Event &, const edm::EventSetup &) override
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated ...
const cms::Ort::ONNXRuntime & getONNXSession() const
void setValue(const std::string var, float val)
std::pair< std::string, Tensor > NamedTensor
Definition: TensorFlow.h:30
int iEvent
Definition: GenABIO.cc:224
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
std::vector< std::string > variablesOrder_
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:180
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:259
bool closeSession(Session *&session)
Definition: TensorFlow.cc:234
tensorflow::Session * getTFSession() const
std::unique_ptr< cms::Ort::ONNXRuntime > ort_
Session * createSession()
Definition: TensorFlow.cc:137
edm::EDGetTokenT< edm::View< T > > src_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Analysis-level electron class.
Definition: Electron.h:51
std::map< std::string, size_t > positions_
Analysis-level calorimeter jet class.
Definition: Jet.h:77
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
tensorflow::Session * tf_session_
deadvectors [0] push_back({0.0175431, 0.538005, 6.80997, 13.29})
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
def cache(function)
Definition: utilities.py:3
void beginStream(edm::StreamID) override
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig, const BaseMVACache *cache)
long double T
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
BaseMVACache(const std::string &model_path, const std::string &backend, const bool disableONNXGraphOpt)
Analysis-level muon class.
Definition: Muon.h:51
def move(src, dest)
Definition: eostools.py:511
std::vector< std::string > output_names_
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables