CMS 3D CMS Logo

BaseMVAValueMapProducer.h
Go to the documentation of this file.
1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
3 
4 // -*- C++ -*-
5 //
6 // Package: PhysicsTools/PatAlgos
7 // Class: BaseMVAValueMapProducer
8 //
16 //
17 // Original Author: Andre Rizzi
18 // Created: Mon, 07 Sep 2017 09:18:03 GMT
19 //
20 //
21 
22 // system include files
23 #include <memory>
24 
25 // user include files
28 
31 
34 
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
37 
44 
49 
50 #include <string>
51 //
52 // class declaration
53 //
54 
55 template <typename T>
57 public:
58  explicit BaseMVAValueMapProducer(const edm::ParameterSet& iConfig)
59  : src_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("src"))),
60  variablesOrder_(iConfig.getParameter<std::vector<std::string>>("variablesOrder")),
61  singleThreadPool_(iConfig.getParameter<std::string>("singleThreadPool")),
62  name_(iConfig.getParameter<std::string>("name")),
63  backend_(iConfig.getParameter<std::string>("backend")),
64  weightfilename_(iConfig.getParameter<edm::FileInPath>("weightFile").fullPath()),
65  isClassifier_(iConfig.getParameter<bool>("isClassifier")),
66  tmva_(backend_ == "TMVA"),
67  tf_(backend_ == "TF") {
68  if (tmva_)
69  reader_ = new TMVA::Reader();
70  edm::ParameterSet const& varsPSet = iConfig.getParameter<edm::ParameterSet>("variables");
71  for (const std::string& vname : varsPSet.getParameterNamesForType<std::string>()) {
72  funcs_.emplace_back(
73  std::pair<std::string, StringObjectFunction<T, true>>(vname, varsPSet.getParameter<std::string>(vname)));
74  }
75 
76  values_.resize(variablesOrder_.size());
77  size_t i = 0;
78  for (const auto& v : variablesOrder_) {
79  positions_[v] = i;
80  if (tmva_)
81  reader_->AddVariable(v, (&values_.front()) + i);
82  i++;
83  }
84  // reader_.BookMVA(name_,iConfig.getParameter<edm::FileInPath>("weightFile").fullPath() );
85  if (tmva_) {
87  } else if (tf_) {
90  inputTensorName_ = iConfig.getParameter<std::string>("inputTensorName");
91  outputTensorName_ = iConfig.getParameter<std::string>("outputTensorName");
92  output_names_ = iConfig.getParameter<std::vector<std::string>>("outputNames");
93  for (const auto& s : iConfig.getParameter<std::vector<std::string>>("outputFormulas")) {
94  output_formulas_.push_back(StringObjectFunction<std::vector<float>>(s));
95  }
96  size_t nThreads = iConfig.getParameter<unsigned int>("nThreads");
98 
99  } else {
100  throw cms::Exception("ConfigError") << "Only 'TF' and 'TMVA' backends are supported\n";
101  }
102  if (tmva_)
103  produces<edm::ValueMap<float>>();
104  else {
105  for (const auto& n : output_names_) {
106  produces<edm::ValueMap<float>>(n);
107  }
108  }
109  }
111 
112  void setValue(const std::string var, float val) {
113  if (positions_.find(var) != positions_.end())
115  }
116 
118  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
119 
120 private:
121  void beginStream(edm::StreamID) override{};
122  void produce(edm::Event&, const edm::EventSetup&) override;
123  void endStream() override{};
124 
127  virtual void fillAdditionalVariables(const T&) {}
128 
130  std::map<std::string, size_t> positions_;
131  std::vector<std::pair<std::string, StringObjectFunction<T, true>>> funcs_;
132  std::vector<std::string> variablesOrder_;
133  std::vector<float> values_;
134  TMVA::Reader* reader_;
135  tensorflow::GraphDef* graph_;
136  tensorflow::Session* session_;
138 
143  bool tmva_;
144  bool tf_;
147  std::vector<std::string> output_names_;
148  std::vector<StringObjectFunction<std::vector<float>>> output_formulas_;
149 };
150 
151 template <typename T>
154  iEvent.getByToken(src_, src);
155  readAdditionalCollections(iEvent, iSetup);
156  std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
157  for (auto& v : mvaOut)
158  v.reserve(src->size());
159 
160  for (auto const& o : *src) {
161  for (auto const& p : funcs_) {
162  setValue(p.first, p.second(o));
163  }
164  fillAdditionalVariables(o);
165  if (tmva_) {
166  mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
167  }
168  if (tf_) {
169  //currently support only one input sensor to reuse the TMVA like config
170  tensorflow::TensorShape input_size{1, (long long int)positions_.size()};
171  tensorflow::NamedTensorList input_tensors;
172  input_tensors.resize(1);
173  input_tensors[0] =
174  tensorflow::NamedTensor(inputTensorName_, tensorflow::Tensor(tensorflow::DT_FLOAT, input_size));
175  for (size_t j = 0; j < values_.size(); j++) {
176  input_tensors[0].second.matrix<float>()(0, j) = values_[j];
177  }
178  std::vector<tensorflow::Tensor> outputs;
179  std::vector<std::string> names;
180  names.push_back(outputTensorName_);
181  tensorflow::run(session_, input_tensors, names, &outputs, singleThreadPool_);
182  std::vector<float> tmpOut;
183  for (int k = 0; k < outputs.at(0).matrix<float>().dimension(1); k++)
184  tmpOut.push_back(outputs.at(0).matrix<float>()(0, k));
185  for (size_t k = 0; k < output_names_.size(); k++)
186  mvaOut[k].push_back(output_formulas_[k](tmpOut));
187  }
188  }
189  size_t k = 0;
190  for (auto& m : mvaOut) {
191  std::unique_ptr<edm::ValueMap<float>> mvaV(new edm::ValueMap<float>());
193  filler.insert(src, m.begin(), m.end());
194  filler.fill();
195  iEvent.put(std::move(mvaV), (tmva_) ? "" : output_names_[k]);
196  k++;
197  }
198 }
199 
200 template <typename T>
203  desc.add<edm::InputTag>("src")->setComment("input physics object collection");
204  desc.add<std::vector<std::string>>("variablesOrder")->setComment("ordered list of MVA input variable names");
205  desc.add<std::string>("name")->setComment("output score variable name");
206  desc.add<bool>("isClassifier")->setComment("is a classifier discriminator");
208  variables.setAllowAnything();
209  desc.add<edm::ParameterSetDescription>("variables", variables)->setComment("list of input variable definitions");
210  desc.add<edm::FileInPath>("weightFile")->setComment("xml weight file");
211  desc.add<std::string>("backend", "TMVA")->setComment("TMVA or TF");
212  desc.add<std::string>("inputTensorName", "")->setComment("Name of tensorflow input tensor in the model");
213  desc.add<std::string>("outputTensorName", "")->setComment("Name of tensorflow output tensor in the model");
214  desc.add<std::vector<std::string>>("outputNames", std::vector<std::string>())
215  ->setComment("Names of the output values to be used in the output valuemap");
216  desc.add<std::vector<std::string>>("outputFormulas", std::vector<std::string>())
217  ->setComment("Formulas to be used to post process the output");
218  desc.add<unsigned int>("nThreads", 1)->setComment("number of threads");
219  desc.add<std::string>("singleThreadPool", "no_threads");
220 
221  return desc;
222 }
223 
224 template <typename T>
226  edm::ParameterSetDescription desc = getDescription();
228  if (typeid(T) == typeid(pat::Jet))
229  modname += "Jet";
230  else if (typeid(T) == typeid(pat::Muon))
231  modname += "Muon";
232  else if (typeid(T) == typeid(pat::Electron))
233  modname += "Ele";
234  modname += "BaseMVAValueMapProducer";
235  descriptions.add(modname, desc);
236 }
237 
238 #endif
BaseMVAValueMapProducer::output_names_
std::vector< std::string > output_names_
Definition: BaseMVAValueMapProducer.h:147
tensorflow::createSession
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:85
edm::StreamID
Definition: StreamID.h:30
pat::helper::ParametrizationHelper::dimension
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
Definition: ParametrizationHelper.h:12
BaseMVAValueMapProducer::readAdditionalCollections
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables
Definition: BaseMVAValueMapProducer.h:126
reco::JetExtendedAssociation::setValue
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated
Definition: JetExtendedAssociation.cc:44
electrons_cff.bool
bool
Definition: electrons_cff.py:372
BaseMVAValueMapProducer::backend_
std::string backend_
Definition: BaseMVAValueMapProducer.h:140
mps_fire.i
i
Definition: mps_fire.py:355
edm::ParameterSetDescription::add
ParameterDescriptionBase * add(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:95
StringObjectFunction< T, true >
TensorFlow.h
BaseMVAValueMapProducer::produce
void produce(edm::Event &, const edm::EventSetup &) override
Definition: BaseMVAValueMapProducer.h:152
BaseMVAValueMapProducer::endStream
void endStream() override
Definition: BaseMVAValueMapProducer.h:123
dqmiodumpmetadata.n
n
Definition: dqmiodumpmetadata.py:28
sistrip::View
View
Definition: ConstantsForView.h:26
BaseMVAValueMapProducer::output_formulas_
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
Definition: BaseMVAValueMapProducer.h:148
L1TEGammaDiff_cfi.variables
variables
Definition: L1TEGammaDiff_cfi.py:5
edm::EDGetTokenT
Definition: EDGetToken.h:33
contentValuesFiles.fullPath
fullPath
Definition: contentValuesFiles.py:64
edm
HLT enums.
Definition: AlignableModifier.h:19
Muon.h
AlCaHLTBitMon_ParallelJobs.p
p
Definition: AlCaHLTBitMon_ParallelJobs.py:153
tensorflow::NamedTensor
std::pair< std::string, Tensor > NamedTensor
Definition: TensorFlow.h:29
BaseMVAValueMapProducer::setValue
void setValue(const std::string var, float val)
Definition: BaseMVAValueMapProducer.h:112
BaseMVAValueMapProducer::values_
std::vector< float > values_
Definition: BaseMVAValueMapProducer.h:133
PatBasicFWLiteJetAnalyzer_Selector_cfg.outputs
outputs
Definition: PatBasicFWLiteJetAnalyzer_Selector_cfg.py:48
BaseMVAValueMapProducer::outputTensorName_
std::string outputTensorName_
Definition: BaseMVAValueMapProducer.h:146
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
BaseMVAValueMapProducer::BaseMVAValueMapProducer
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig)
Definition: BaseMVAValueMapProducer.h:58
EDProducer.h
BaseMVAValueMapProducer::tmva_
bool tmva_
Definition: BaseMVAValueMapProducer.h:143
pat::Muon
Analysis-level muon class.
Definition: Muon.h:51
findQualityFiles.v
v
Definition: findQualityFiles.py:179
BaseMVAValueMapProducer::inputTensorName_
std::string inputTensorName_
Definition: BaseMVAValueMapProducer.h:145
edm::Handle
Definition: AssociativeIterator.h:50
BaseMVAValueMapProducer::graph_
tensorflow::GraphDef * graph_
Definition: BaseMVAValueMapProducer.h:135
BaseMVAValueMapProducer::weightfilename_
std::string weightfilename_
Definition: BaseMVAValueMapProducer.h:141
EcalTangentSkim_cfg.o
o
Definition: EcalTangentSkim_cfg.py:36
BaseMVAValueMapProducer::funcs_
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
Definition: BaseMVAValueMapProducer.h:131
trigObjTnPSource_cfi.var
var
Definition: trigObjTnPSource_cfi.py:21
edm::FileInPath
Definition: FileInPath.h:64
MakerMacros.h
alignCSCRings.s
s
Definition: alignCSCRings.py:92
pat::Jet
Analysis-level calorimeter jet class.
Definition: Jet.h:77
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:57
names
const std::string names[nVars_]
Definition: PhotonIDValueMapProducer.cc:122
visualization-live-secondInstance_cfg.m
m
Definition: visualization-live-secondInstance_cfg.py:72
BaseMVAValueMapProducer::variablesOrder_
std::vector< std::string > variablesOrder_
Definition: BaseMVAValueMapProducer.h:132
dqmdumpme.k
k
Definition: dqmdumpme.py:60
runTheMatrix.nThreads
nThreads
Definition: runTheMatrix.py:344
BaseMVAValueMapProducer
Definition: BaseMVAValueMapProducer.h:56
edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28
BaseMVAValueMapProducer::src_
edm::EDGetTokenT< edm::View< T > > src_
Definition: BaseMVAValueMapProducer.h:129
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
HLT_2018_cff.InputTag
InputTag
Definition: HLT_2018_cff.py:79016
edm::ParameterSet
Definition: ParameterSet.h:36
BaseMVAValueMapProducer::singleThreadPool_
std::string singleThreadPool_
Definition: BaseMVAValueMapProducer.h:137
TrackRefitter_38T_cff.src
src
Definition: TrackRefitter_38T_cff.py:24
Event.h
BaseMVAValueMapProducer::fillDescriptions
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Definition: BaseMVAValueMapProducer.h:225
trigObjTnPSource_cfi.filler
filler
Definition: trigObjTnPSource_cfi.py:21
tensorflow::NamedTensorList
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:30
createfilelist.int
int
Definition: createfilelist.py:10
edm::ParameterSet::getParameterNamesForType
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:168
iEvent
int iEvent
Definition: GenABIO.cc:224
edm::stream::EDProducer
Definition: EDProducer.h:38
tensorflow::setLogging
void setLogging(const std::string &level="3")
Definition: TensorFlow.cc:15
edm::EventSetup
Definition: EventSetup.h:57
BaseMVAValueMapProducer::session_
tensorflow::Session * session_
Definition: BaseMVAValueMapProducer.h:136
Jet.h
BaseMVAValueMapProducer::~BaseMVAValueMapProducer
~BaseMVAValueMapProducer() override
Definition: BaseMVAValueMapProducer.h:110
ValueMap.h
tensorflow::loadGraphDef
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:68
edm::ParameterSet::getParameter
T getParameter(std::string const &) const
BaseMVAValueMapProducer::name_
std::string name_
Definition: BaseMVAValueMapProducer.h:139
heppy_batch.val
val
Definition: heppy_batch.py:351
eostools.move
def move(src, dest)
Definition: eostools.py:511
std
Definition: JetResolutionObject.h:76
BaseMVAValueMapProducer::positions_
std::map< std::string, size_t > positions_
Definition: BaseMVAValueMapProducer.h:130
Frameworkfwd.h
T
long double T
Definition: Basic3DVectorLD.h:48
edm::ValueMap< float >
Exception
Definition: hltDiff.cc:246
reco::details::loadTMVAWeights
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
Definition: TMVAZipReader.cc:52
tensorflow::run
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:211
TMVAZipReader.h
Electron.h
BaseMVAValueMapProducer::getDescription
static edm::ParameterSetDescription getDescription()
Definition: BaseMVAValueMapProducer.h:201
BaseMVAValueMapProducer::tf_
bool tf_
Definition: BaseMVAValueMapProducer.h:144
edm::helper::Filler
Definition: ValueMap.h:22
pat::Electron
Analysis-level electron class.
Definition: Electron.h:51
ParameterSet.h
dqmiolumiharvest.j
j
Definition: dqmiolumiharvest.py:66
edm::Event
Definition: Event.h:73
BaseMVAValueMapProducer::beginStream
void beginStream(edm::StreamID) override
Definition: BaseMVAValueMapProducer.h:121
StringObjectFunction.h
StreamID.h
timingPdfMaker.modname
modname
Definition: timingPdfMaker.py:218
edm::InputTag
Definition: InputTag.h:15
BaseMVAValueMapProducer::isClassifier_
bool isClassifier_
Definition: BaseMVAValueMapProducer.h:142
BaseMVAValueMapProducer::fillAdditionalVariables
virtual void fillAdditionalVariables(const T &)
Definition: BaseMVAValueMapProducer.h:127
BaseMVAValueMapProducer::reader_
TMVA::Reader * reader_
Definition: BaseMVAValueMapProducer.h:134