CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
BaseMVAValueMapProducer.h
Go to the documentation of this file.
1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
3 
4 // -*- C++ -*-
5 //
6 // Package: PhysicsTools/PatAlgos
7 // Class: BaseMVAValueMapProducer
8 //
16 //
17 // Original Author: Andre Rizzi
18 // Created: Mon, 07 Sep 2017 09:18:03 GMT
19 //
20 //
21 
22 // system include files
23 #include <memory>
24 
25 // user include files
28 
31 
34 
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
37 
44 
50 
51 #include <string>
52 //
53 // class declaration
54 //
55 
56 class BaseMVACache {
57 public:
58  BaseMVACache(const std::string& model_path, const std::string& backend) {
59  if (backend == "TF") {
60  graph_.reset(tensorflow::loadGraphDef(model_path));
62  } else if (backend == "ONNX") {
63  ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
64  }
65  }
67 
68  tensorflow::Session* getTFSession() const { return tf_session_; }
69  const cms::Ort::ONNXRuntime& getONNXSession() const { return *ort_; }
70 
71 private:
72  std::shared_ptr<tensorflow::GraphDef> graph_;
73  tensorflow::Session* tf_session_ = nullptr;
74  std::unique_ptr<cms::Ort::ONNXRuntime> ort_;
75 };
76 
77 template <typename T>
78 class BaseMVAValueMapProducer : public edm::stream::EDProducer<edm::GlobalCache<BaseMVACache>> {
79 public:
81  : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
82  variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
83  name_(iConfig.getParameter<std::string>("name")),
84  backend_(iConfig.getParameter<std::string>("backend")),
85  weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
86  isClassifier_(iConfig.getParameter<bool>("isClassifier")),
87  tmva_(backend_ == "TMVA"),
88  tf_(backend_ == "TF"),
89  onnx_(backend_ == "ONNX"),
90  batch_eval_(iConfig.getParameter<bool>("batch_eval")) {
91  if (!(tmva_ || tf_ || onnx_)) {
92  throw cms::Exception("ConfigError") << "Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
93  }
94 
95  if (tmva_)
96  reader_ = new TMVA::Reader();
97  edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
98  for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
99  funcs_.emplace_back(
100  std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
101  }
102 
103  values_.resize(variablesOrder_.size());
104  size_t i = 0;
105  for (const auto& v : variablesOrder_) {
106  positions_[v] = i;
107  if (tmva_)
108  reader_->AddVariable(v, (&values_.front()) + i);
109  i++;
110  }
111  // reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
112  if (tmva_) {
114  }
115  if (tf_ || onnx_) {
116  inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
117  outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
118  output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
119  for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
120  output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
121  }
122  }
123 
124  if (tmva_)
125  produces<edm::ValueMap<float>>();
126  else {
127  for (const auto& n : output_names_) {
128  produces<edm::ValueMap<float>>(n);
129  }
130  }
131  }
133 
134  void setValue(const std::string var, float val) {
135  if (positions_.find(var) != positions_.end())
136  values_[positions_[var]] = val;
137  }
138 
139  static std::unique_ptr<BaseMVACache> initializeGlobalCache(const edm::ParameterSet& cfg);
140  static void globalEndJob(const BaseMVACache* cache);
141 
143  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
144 
145 private:
146  void beginStream(edm::StreamID) override{};
147  void produce(edm::Event&, const edm::EventSetup&) override;
148  void endStream() override{};
149 
152  virtual void fillAdditionalVariables(const T&) {}
153 
155  std::map<std::string, size_t> positions_;
156  std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
157  std::vector<std::string> variablesOrder_;
158  std::vector<float> values_;
159  TMVA::Reader* reader_;
160 
165  bool tmva_;
166  bool tf_;
167  bool onnx_;
171  std::vector<std::string> output_names_;
172  std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
173 };
174 
175 template <typename T>
178  iEvent.getByToken(src_, src);
179  readAdditionalCollections(iEvent, iSetup);
180  std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
181  for (auto& v : mvaOut)
182  v.reserve(src->size());
183 
184  if (batch_eval_) {
185  if (!src->empty()) {
186  std::vector<float> data;
187  data.reserve(src->size() * positions_.size());
188  for (auto const& o : *src) {
189  for (auto const& p : funcs_) {
190  setValue(p.first, p.second(o));
191  }
192  fillAdditionalVariables(o);
193  data.insert(data.end(), values_.begin(), values_.end());
194  }
195 
196  std::vector<float> outputs;
197  if (tf_) {
198  tensorflow::TensorShape input_size{(long long int)src->size(), (long long int)positions_.size()};
199  tensorflow::NamedTensorList input_tensors;
200  input_tensors.resize(1);
201  input_tensors[0] =
202  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
203  for (unsigned i = 0; i < data.size(); ++i) {
204  input_tensors[0].second.flat<float>()(i) = data[i];
205  }
206  std::vector<tensorflow::Tensor> output_tensors;
207  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
208  for (unsigned i = 0; i < output_tensors.at(0).NumElements(); ++i) {
209  outputs.push_back(output_tensors.at(0).flat<float>()(i));
210  }
211  } else if (onnx_) {
213  outputs =
214  globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_}, src->size())[0];
215  }
216 
217  const unsigned outdim = outputs.size() / src->size();
218  for (unsigned i = 0; i < src->size(); ++i) {
219  std::vector<float> tmpOut(outputs.begin() + i * outdim, outputs.begin() + (i + 1) * outdim);
220  for (size_t k = 0; k < output_names_.size(); k++) {
221  mvaOut[k].push_back(output_formulas_[k](tmpOut));
222  }
223  }
224  }
225  } else {
226  for (auto const& o : *src) {
227  for (auto const& p : funcs_) {
228  setValue(p.first, p.second(o));
229  }
230  fillAdditionalVariables(o);
231  if (tmva_) {
232  mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
233  } else {
234  std::vector<float> tmpOut;
235  if (tf_) {
236  //currently support only one input sensor to reuse the TMVA like config
237  tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
238  tensorflow::NamedTensorList input_tensors;
239  input_tensors.resize(1);
240  input_tensors[0] =
241  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
242  for (size_t j = 0; j < values_.size(); j++) {
243  input_tensors[0].second.matrix<float>()(0, j) = values_[j];
244  }
245  std::vector<tensorflow::Tensor> outputs;
246  tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
247  for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
248  tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
249  } else if (onnx_) {
250  cms::Ort::FloatArrays inputs{values_};
251  tmpOut = globalCache()->getONNXSession().run({inputTensorName_}, inputs, {}, {outputTensorName_})[0];
252  }
253  for (size_t k = 0; k < output_names_.size(); k++)
254  mvaOut[k].push_back(output_formulas_[k](tmpOut));
255  }
256  }
257  }
258 
259  size_t k = 0;
260  for (auto& m : mvaOut) {
261  std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
262  edm::ValueMap<float>::Filler filler(*mvaV);
263  filler.insert(src, m.begin(), m.end());
264  filler.fill();
265  iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
266  k++;
267  }
268 }
269 
270 template <typename T>
272  return std::make_unique<BaseMVACache>(cfg.getParameter<edm::FileInPath>("weightFile").fullPath(),
273  cfg.getParameter<std::string>("backend"));
274 }
275 
276 template <typename T>
278 
279 template <typename T>
282  desc.add<edm::InputTag>("src")->setComment("input physics object collection");
283  desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
284  desc.add<std::string>("name")->setComment("output score variable name");
285  desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
287  variables.setAllowAnything();
288  desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
289  desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
290  desc.add<std::string>("backend", "TMVA")->setComment("TMVA, TF or ONNX");
291  desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
292  desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
293  desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
294  ->setComment("Names of the output values to be used in the output valuemap");
295  desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
296  ->setComment("Formulas to be used to post process the output");
297  desc.add<bool>("batch_eval", false)->setComment("Run inference in batch instead of per-object");
298 
299  return desc;
300 }
301 
302 template <typename T>
304  edm::ParameterSetDescription desc = getDescription();
306  if (typeid(T) == typeid(pat::Jet))
307  modname += "Jet";
308  else if (typeid(T) == typeid(pat::Muon))
309  modname += "Muon";
310  else if (typeid(T) == typeid(pat::Electron))
311  modname += "Ele";
312  modname += "BaseMVAValueMapProducer";
313  descriptions.add(modname, desc);
314 }
315 
316 #endif
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:85
static edm::ParameterSetDescription getDescription()
void setComment(std::string const &value)
virtual void fillAdditionalVariables(const T &)
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:30
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:133
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
tuple cfg
Definition: looper.py:296
static std::unique_ptr< BaseMVACache > initializeGlobalCache(const edm::ParameterSet &cfg)
std::shared_ptr< tensorflow::GraphDef > graph_
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:539
void setAllowAnything()
allow any parameter label/value pairs
const cms::Ort::ONNXRuntime & getONNXSession() const
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:68
static void globalEndJob(const BaseMVACache *cache)
void insert(const H &h, I begin, I end)
Definition: ValueMap.h:53
void produce(edm::Event &, const edm::EventSetup &) override
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated ...
void setValue(const std::string var, float val)
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:179
std::pair< std::string, Tensor > NamedTensor
Definition: TensorFlow.h:29
int iEvent
Definition: GenABIO.cc:224
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
std::vector< std::string > variablesOrder_
BaseMVACache(const std::string &model_path, const std::string &backend)
tensorflow::Session * getTFSession() const
list var
if using global norm cols_to_minmax = [&#39;t_delta&#39;, &#39;t_hmaxNearP&#39;,&#39;t_emaxNearP&#39;, &#39;t_hAnnular&#39;, &#39;t_eAnnular&#39;,&#39;t_pt&#39;,&#39;t_nVtx&#39;,&#39;t_ieta&#39;,&#39;t_eHcal10&#39;, &#39;t_eHcal30&#39;,&#39;t_rhoh&#39;,&#39;t_eHcal&#39;] df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min()) if (x.max() - x.min() &gt; 0) else 1.0/200.0)
def move
Definition: eostools.py:511
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:213
bool closeSession(Session *&session)
Definition: TensorFlow.cc:198
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::unique_ptr< cms::Ort::ONNXRuntime > ort_
edm::EDGetTokenT< edm::View< T > > src_
T getParameter(std::string const &) const
Definition: ParameterSet.h:303
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Analysis-level electron class.
Definition: Electron.h:51
std::map< std::string, size_t > positions_
Analysis-level calorimeter jet class.
Definition: Jet.h:77
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
tensorflow::Session * tf_session_
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
void beginStream(edm::StreamID) override
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig, const BaseMVACache *cache)
long double T
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
Analysis-level muon class.
Definition: Muon.h:51
std::vector< std::string > output_names_
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables
def cache
Definition: utilities.py:3