CMS 3D CMS Logo

BaseMVAValueMapProducer.h
Go to the documentation of this file.
1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
3 
4 // -*- C++ -*-
5 //
6 // Package: PhysicsTools/PatAlgos
7 // Class: BaseMVAValueMapProducer
8 //
16 //
17 // Original Author: Andre Rizzi
18 // Created: Mon, 07 Sep 2017 09:18:03 GMT
19 //
20 //
21 
22 // system include files
23 #include <memory>
24 
25 // user include files
28 
31 
34 
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
37 
44 
50 
51 #include <string>
52 //
53 // class declaration
54 //
55 
56 template <typename T>
58 public:
59  explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig)
60  : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
61  variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
62  name_(iConfig.getParameter<std::string>("name")),
63  backend_(iConfig.getParameter<std::string>("backend")),
64  weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
65  isClassifier_(iConfig.getParameter<bool>("isClassifier")),
66  tmva_(backend_ == "TMVA"),
67  tf_(backend_ == "TF"),
68  onnx_(backend_ == "ONNX"),
69  batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
70  if (tmva_)
71  reader_ = new TMVA::Reader();
72  edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
73  for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
74  funcs_.emplace_back(
75  std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
76  }
77 
78  values_.resize(variablesOrder_.size());
79  size_t i = 0;
80  for (const auto& v : variablesOrder_) {
81  positions_[v] = i;
82  if (tmva_)
83  reader_->AddVariable(v, (&values_.front()) + i);
84  i++;
85  }
86  // reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
87  if (tmva_) {
89  } else if (tf_) {
92  size_t nThreads = iConfig.getParameter<unsigned int>("nThreads");
94  } else if (onnx_) {
95  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(weightfilename_);
96  } else {
97  throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
98  }
99  if (tf_ || onnx_) {
100  inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
101  outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
102  output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
103  for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
104  output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
105  }
106  }
107  if (tmva_)
108  produces<edm::ValueMap<float>>();
109  else {
110  for (const auto& n : output_names_) {
111  produces<edm::ValueMap<float>>(n);
112  }
113  }
114  }
116 
117  void setValue(const std::string var, float val) {
118  if (positions_.find(var) != positions_.end())
119  values_[positions_[var]] = val;
120  }
121 
123  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
124 
125 private:
126  void beginStream(edm::StreamID) override{};
127  void produce(edm::Event&, const edm::EventSetup&) override;
128  void endStream() override{};
129 
132  virtual void fillAdditionalVariables(const T&) {}
133 
135  std::map<std::string, size_t> positions_;
136  std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
137  std::vector<std::string> variablesOrder_;
138  std::vector<float> values_;
139  TMVA::Reader* reader_;
140  tensorflow::GraphDef* graph_;
141  tensorflow::Session* session_;
143  std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
144 
149  bool tmva_;
150  bool tf_;
151  bool onnx_;
155  std::vector<std::string> output_names_;
156  std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
157 };
158 
159 template <typename T>
162  iEvent.getByToken(src_, src);
163  readAdditionalCollections(iEvent, iSetup);
164  std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
165  for (auto& v : mvaOut)
166  v.reserve(src->size());
167 
168  if (batch_eval_) {
169  if (!src->empty()) {
170  std::vector<float> data;
171  data.reserve(src->size() * positions_.size());
172  for (auto const& o : *src) {
173  for (auto const& p : funcs_) {
174  setValue(p.first, p.second(o));
175  }
177  data.insert(data.end(), values_.begin(), values_.end());
178  }
179 
180  std::vector<float> outputs;
181  if (tf_) {
182  //currently support only one input sensor to reuse the TMVA like config
183  tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
184  tensorflow::NamedTensorList input_tensors;
185  input_tensors.resize(1);
186  input_tensors[0] =
187  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
188 
189  for (unsigned i = 0; i < data.size(); ++i) {
190  input_tensors[0].second.flat<float>()(i) = data[i];
191  }
192  std::vector<tensorflow::Tensor> output_tensors;
193  tensorflow::run(session_, input_tensors, {outputTensorName_}, &output_tensors);
194  for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
195  outputs.push_back(output_tensors.at(0).flat<float>()(i));
196  }
197  } else if (onnx_) {
199  outputs = ort_->run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
200  }
201 
202  const unsigned outdim = outputs.size() / src->size();
203  for (unsigned i = 0; i < src->size(); ++i) {
204  std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
205  for (size_t k = 0; k < output_names_.size(); k++) {
206  mvaOut[k].push_back(output_formulas_[k](tmpOut));
207  }
208  }
209  }
210  } else {
211  for (auto const& o : *src) {
212  for (auto const& p : funcs_) {
213  setValue(p.first, p.second(o));
214  }
216  if (tmva_) {
217  mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
218  } else {
219  std::vector<float> tmpOut;
220  if (tf_) {
221  //currently support only one input sensor to reuse the TMVA like config
222  tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
223  tensorflow::NamedTensorList input_tensors;
224  input_tensors.resize(1);
225  input_tensors[0] =
226  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
227  for (size_t j = 0; j < values_.size(); j++) {
228  input_tensors[0].second.matrix<float>()(0, j) = values_[j];
229  }
230  std::vector<tensorflow::Tensor> outputs;
231  tensorflow::run(session_, input_tensors, {outputTensorName_}, &outputs);
232  for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
233  tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
234  } else if (onnx_) {
236  tmpOut = ort_->run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
237  }
238 
239  for (size_t k = 0; k < output_names_.size(); k++)
240  mvaOut[k].push_back(output_formulas_[k](tmpOut));
241  }
242  }
243  }
244 
245  size_t k = 0;
246  for (auto& m : mvaOut) {
247  std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
249  filler.insert(src, m.begin(), m.end());
250  filler.fill();
251  iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
252  k++;
253  }
254 }
255 
256 template <typename T>
259  desc.add<edm::InputTag>("src")->setComment("input physics object collection");
260  desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
261  desc.add<std::string>("name")->setComment("output score variable name");
262  desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
264  variables.setAllowAnything();
265  desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
266  desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
267  desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
268  desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
269  desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
270  desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
271  ->setComment("Names of the output values to be used in the output valuemap");
272  desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
273  ->setComment("Formulas to be used to post process the output");
274  desc.add<unsigned int>("nThreads", 1)->setComment("number of threads");
275  desc.add<std::string>("singleThreadPool", "no_threads");
276  desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
277  desc.add<bool>("disableONNXGraphOpt", false)->setComment("Disable ONNX runtime graph optimization");
278 
279  return desc;
280 }
281 
282 template <typename T>
285  std::string modname;
286  if (typeid(T) == typeid(pat::Jet))
287  modname += "Jet";
288  else if (typeid(T) == typeid(pat::Muon))
289  modname += "Muon";
290  else if (typeid(T) == typeid(pat::Electron))
291  modname += "Ele";
292  modname += "BaseMVAValueMapProducer";
293  descriptions.add(modname, desc);
294 }
295 
296 #endif
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:87
static edm::ParameterSetDescription getDescription()
T getParameter(std::string const &) const
void setComment(std::string const &value)
virtual void fillAdditionalVariables(const T &)
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:26
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:125
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:517
void setAllowAnything()
allow any parameter label/value pairs
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:68
void insert(const H &h, I begin, I end)
Definition: ValueMap.h:53
void produce(edm::Event &, const edm::EventSetup &) override
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
void setValue(const std::string var, float val)
tensorflow::GraphDef * graph_
std::unique_ptr< cms::Ort::ONNXRuntime > ort_
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig)
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:169
std::pair< std::string, Tensor > NamedTensor
Definition: TensorFlow.h:25
int iEvent
Definition: GenABIO.cc:224
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
std::vector< std::string > variablesOrder_
ParameterDescriptionBase * add(U const &iLabel, T const &value)
int k[5][pyjets_maxn]
edm::EDGetTokenT< edm::View< T > > src_
void setLogging(const std::string &level="3")
Definition: TensorFlow.cc:14
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
tensorflow::Session * session_
Analysis-level electron class.
Definition: Electron.h:52
std::map< std::string, size_t > positions_
Analysis-level calorimeter jet class.
Definition: Jet.h:80
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, const std::vector< std::string > &targetNodes, std::vector< Tensor > *outputs)
Definition: TensorFlow.cc:210
void beginStream(edm::StreamID) override
long double T
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
Analysis-level muon class.
Definition: Muon.h:51
def move(src, dest)
Definition: eostools.py:511
std::vector< std::string > output_names_
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables