CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
EgammaDNNHelper.cc
Go to the documentation of this file.
1 
5 #include <iostream>
6 #include <fstream>
7 using namespace egammaTools;
8 
11  const std::vector<std::string>& availableVars)
12  : cfg_(cfg), modelSelector_(modelSelector), nModels_(cfg_.modelsFiles.size()), graphDefs_(cfg_.modelsFiles.size()) {
14  initScalerFiles(availableVars);
15 }
16 
18  // load the graph definition
19  LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs";
20  size_t i = 0;
21  for (const auto& model_file : cfg_.modelsFiles) {
22  graphDefs_[i] =
23  std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(model_file).fullPath()));
24  i++;
25  }
26 }
27 
28 std::vector<tensorflow::Session*> EgammaDNNHelper::getSessions() const {
29  std::vector<tensorflow::Session*> sessions;
30  LogDebug("EgammaDNNHelper") << "Starting " << nModels_ << " TF sessions";
31  for (const auto& graphDef : graphDefs_) {
32  sessions.push_back(tensorflow::createSession(graphDef.get()));
33  }
34  LogDebug("EgammaDNNHelper") << "TF sessions started";
35  return sessions;
36 }
37 
38 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
39  for (const auto& scaler_file : cfg_.scalersFiles) {
40  // Parse scaler configuration
41  std::vector<ScalerConfiguration> features;
42  std::ifstream inputfile_scaler{edm::FileInPath(scaler_file).fullPath()};
43  int ninputs = 0;
44  if (inputfile_scaler.fail()) {
45  throw cms::Exception("MissingFile") << "Scaler file for Electron PFid DNN not found";
46  } else {
47  // Now read mean, scale factors for each variable
48  float par1, par2;
49  std::string varName, type_str;
50  uint type;
51  while (inputfile_scaler >> varName >> type_str >> par1 >> par2) {
52  if (type_str == "stdscale")
53  type = 1;
54  else if (type_str == "minmax")
55  type = 2;
56  else if (type_str == "custom1") // 2*((X_train - minValues)/(MaxMinusMin)) -1.0
57  type = 3;
58  else
59  type = 0;
60  features.push_back(ScalerConfiguration{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
61  // Protection for mismatch between requested variables and the available ones
62  auto match = std::find(availableVars.begin(), availableVars.end(), varName);
63  if (match == std::end(availableVars)) {
64  throw cms::Exception("MissingVariable")
65  << "Requested variable (" << varName << ") not available between DNN inputs";
66  }
67  ninputs += 1;
68  }
69  }
70  inputfile_scaler.close();
71  featuresMap_.push_back(features);
72  nInputs_.push_back(ninputs);
73  }
74 }
75 
76 std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
77  const std::map<std::string, float>& variables) const {
78  // Call the modelSelector function passing the variables map to return
79  // the modelIndex to be used for the current candidate
80  const auto modelIndex = modelSelector_(variables);
81  std::vector<float> inputs;
82  // Loop on the list of requested variables and scaling values for the specific modelIndex
83  // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
84  for (auto& [varName, type, par1, par2] : featuresMap_[modelIndex]) {
85  if (type == 1) // Standard scaling
86  inputs.push_back((variables.at(varName) - par1) / par2);
87  else if (type == 2) // MinMax
88  inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
89  else if (type == 3) //2*((X_train - minValues)/(MaxMinusMin)) -1.0
90  inputs.push_back(2 * (variables.at(varName) - par1) / (par2 - par1) - 1.);
91  else {
92  inputs.push_back(variables.at(varName)); // Do nothing on the variable
93  }
94  //Protection for mismatch between requested variables and the available ones
95  // have been added when the scaler config are loaded --> here we know that the variables are available
96  }
97  return std::make_pair(modelIndex, inputs);
98 }
99 
100 std::vector<std::vector<float>> EgammaDNNHelper::evaluate(const std::vector<std::map<std::string, float>>& candidates,
101  const std::vector<tensorflow::Session*>& sessions) const {
102  /*
103  Evaluate the PFID DNN for all the electrons/photons.
104  nModels_ are defined depending on modelIndex --> we need to build N input tensors to evaluate
105  the DNNs with batching.
106 
107  1) Get all the variable for each candidate vector<map<string:float>>
108  2) Scale the input and select the variables for each model
109  2) Prepare the input tensors for the models
110  3) Run the models and get the output for each candidate
111  4) Sort the output by candidate index
112  5) Return the DNN outputs
113 
114  */
115  size_t nCandidates = candidates.size();
116  std::vector<std::vector<int>> indexMap(nModels_); // for each model; the list of candidate index is saved
117  std::vector<std::vector<float>> inputsVectors(nCandidates);
118  std::vector<uint> counts(nModels_);
119 
120  LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
121 
122  int icand = 0;
123  for (auto& candidate : candidates) {
124  LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
125  const auto& [model_index, inputs] = getScaledInputs(candidate);
126  counts[model_index] += 1;
127  indexMap[model_index].push_back(icand);
128  inputsVectors[icand] = inputs;
129  icand++;
130  }
131 
132  // Prepare one input tensors for each model
133  std::vector<tensorflow::Tensor> input_tensors(nModels_);
134  // Pointers for filling efficiently the input tensors
135  std::vector<float*> input_tensors_pointer(nModels_);
136  for (size_t i = 0; i < nModels_; i++) {
137  LogDebug("EgammaDNNHelper") << "Initializing TF input " << i << " with rows:" << counts[i]
138  << " and cols:" << nInputs_[i];
139  input_tensors[i] = tensorflow::Tensor{tensorflow::DT_FLOAT, {counts[i], nInputs_[i]}};
140  input_tensors_pointer[i] = input_tensors[i].flat<float>().data();
141  }
142 
143  // Filling the input tensors
144  for (size_t m = 0; m < nModels_; m++) {
145  LogDebug("EgammaDNNHelper") << "Loading TF input tensor for model: " << m;
146  float* T = input_tensors_pointer[m];
147  for (size_t cand_index : indexMap[m]) {
148  for (size_t k = 0; k < nInputs_[m]; k++, T++) { //Note the input tensor pointer incremented
149  *T = inputsVectors[cand_index][k];
150  }
151  }
152  }
153 
154  // Define the output and run
155  // Define the output and run
156  std::vector<std::pair<int, std::vector<float>>> outputs;
157  // Run all the models
158  for (size_t m = 0; m < nModels_; m++) {
159  if (counts[m] == 0)
160  continue; //Skip model witout inputs
161  std::vector<tensorflow::Tensor> output;
162  LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << " electrons";
163  tensorflow::run(sessions[m], {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
164  // Get the output and save the ElectronDNNEstimator::outputDim numbers along with the ele index
165  const auto& r = output[0].tensor<float, 2>();
166  // Iterate on the list of elements in the batch --> many electrons
167  for (uint b = 0; b < counts[m]; b++) {
168  std::vector<float> result(cfg_.outputDim);
169  for (size_t k = 0; k < cfg_.outputDim; k++)
170  result[k] = r(b, k);
171  // Get the original index of the electorn in the original order
172  const auto cand_index = indexMap[m][b];
173  outputs.push_back(std::make_pair(cand_index, result));
174  }
175  }
176  // Now we have just to re-order the outputs
177  std::sort(outputs.begin(), outputs.end());
178  std::vector<std::vector<float>> final_outputs(outputs.size());
179  std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
180 
181  return final_outputs;
182 }
std::vector< std::unique_ptr< const tensorflow::GraphDef > > graphDefs_
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:85
std::vector< std::vector< float > > evaluate(const std::vector< std::map< std::string, float >> &candidates, const std::vector< tensorflow::Session * > &sessions) const
tuple cfg
Definition: looper.py:296
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:68
EgammaDNNHelper(const DNNConfiguration &, const ModelSelector &sel, const std::vector< std::string > &availableVars)
std::vector< std::string > modelsFiles
std::vector< uint > nInputs_
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
tuple result
Definition: mps_fire.py:311
std::pair< uint, std::vector< float > > getScaledInputs(const std::map< std::string, float > &variables) const
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
std::vector< std::vector< ScalerConfiguration > > featuresMap_
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:213
std::vector< tensorflow::Session * > getSessions() const
void initScalerFiles(const std::vector< std::string > &availableVars)
double b
Definition: hdecay.h:118
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
double a
Definition: hdecay.h:119
string end
Definition: dataset.py:937
std::string fullPath() const
Definition: FileInPath.cc:161
std::vector< std::string > scalersFiles
std::pair< typename Association::data_type::first_type, double > match(Reference key, Association association, bool bestMatchByMaxValue)
Generic matching function.
Definition: Utils.h:10
std::function< uint(const std::map< std::string, float > &)> ModelSelector
const ModelSelector modelSelector_
long double T
const DNNConfiguration cfg_
tuple size
Write out results.
#define LogDebug(id)
unsigned transform(const HcalDetId &id, unsigned transformCode)