CMS 3D CMS Logo

EgammaDNNHelper.cc
Go to the documentation of this file.
1 
5 #include <iostream>
6 #include <fstream>
7 using namespace egammaTools;
8 
11  const std::vector<std::string>& availableVars)
12  : cfg_(cfg), modelSelector_(modelSelector), nModels_(cfg_.modelsFiles.size()), graphDefs_(cfg_.modelsFiles.size()) {
14  initScalerFiles(availableVars);
15 }
16 
18  // load the graph definition
19  LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs";
20  size_t i = 0;
21  for (const auto& model_file : cfg_.modelsFiles) {
22  graphDefs_[i] =
23  std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(model_file).fullPath()));
24  i++;
25  }
26 }
27 
28 std::vector<tensorflow::Session*> EgammaDNNHelper::getSessions() const {
29  std::vector<tensorflow::Session*> sessions;
30  LogDebug("EgammaDNNHelper") << "Starting " << nModels_ << " TF sessions";
31  sessions.reserve(graphDefs_.size());
32  for (const auto& graphDef : graphDefs_) {
33  sessions.push_back(tensorflow::createSession(graphDef.get()));
34  }
35  LogDebug("EgammaDNNHelper") << "TF sessions started";
36  return sessions;
37 }
38 
39 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
40  for (const auto& scaler_file : cfg_.scalersFiles) {
41  // Parse scaler configuration
42  std::vector<ScalerConfiguration> features;
43  std::ifstream inputfile_scaler{edm::FileInPath(scaler_file).fullPath()};
44  int ninputs = 0;
45  if (inputfile_scaler.fail()) {
46  throw cms::Exception("MissingFile") << "Scaler file for PFid DNN not found";
47  } else {
48  // Now read mean, scale factors for each variable
49  float par1, par2;
50  std::string varName, type_str;
51  uint type;
52  while (inputfile_scaler >> varName >> type_str >> par1 >> par2) {
53  if (type_str == "stdscale")
54  type = 1;
55  else if (type_str == "minmax")
56  type = 2;
57  else if (type_str == "custom1") // 2*((X_train - minValues)/(MaxMinusMin)) -1.0
58  type = 3;
59  else
60  type = 0;
61  features.push_back(ScalerConfiguration{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
62  // Protection for mismatch between requested variables and the available ones
63  auto match = std::find(availableVars.begin(), availableVars.end(), varName);
64  if (match == std::end(availableVars)) {
65  throw cms::Exception("MissingVariable")
66  << "Requested variable (" << varName << ") not available between DNN inputs";
67  }
68  ninputs += 1;
69  }
70  }
71  inputfile_scaler.close();
72  featuresMap_.push_back(features);
73  nInputs_.push_back(ninputs);
74  }
75 }
76 
77 std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
78  const std::map<std::string, float>& variables) const {
79  // Call the modelSelector function passing the variables map to return
80  // the modelIndex to be used for the current candidate
81  const auto modelIndex = modelSelector_(variables);
82  std::vector<float> inputs;
83  // Loop on the list of requested variables and scaling values for the specific modelIndex
84  // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
85  for (auto& [varName, type, par1, par2] : featuresMap_[modelIndex]) {
86  if (type == 1) // Standard scaling
87  inputs.push_back((variables.at(varName) - par1) / par2);
88  else if (type == 2) // MinMax
89  inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
90  else if (type == 3) //2*((X_train - minValues)/(MaxMinusMin)) -1.0
91  inputs.push_back(2 * (variables.at(varName) - par1) / (par2 - par1) - 1.);
92  else {
93  inputs.push_back(variables.at(varName)); // Do nothing on the variable
94  }
95  //Protection for mismatch between requested variables and the available ones
96  // have been added when the scaler config are loaded --> here we know that the variables are available
97  }
98  return std::make_pair(modelIndex, inputs);
99 }
100 
101 std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
102  const std::vector<std::map<std::string, float>>& candidates,
103  const std::vector<tensorflow::Session*>& sessions) const {
104  /*
105  Evaluate the PFID DNN for all the electrons/photons.
106  nModels_ are defined depending on modelIndex --> we need to build N input tensors to evaluate
107  the DNNs with batching.
108 
109  1) Get all the variable for each candidate vector<map<string:float>>
110  2) Scale the input and select the variables for each model
111  2) Prepare the input tensors for the models
112  3) Run the models and get the output for each candidate
113  4) Sort the output by candidate index
114  5) Return the DNN outputs along with the model index used on it
115 
116  */
117  size_t nCandidates = candidates.size();
118  std::vector<std::vector<uint>> indexMap(nModels_); // for each model; the list of candidate index is saved
119  std::vector<std::vector<float>> inputsVectors(nCandidates);
120  std::vector<uint> counts(nModels_);
121 
122  LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
123 
124  uint icand = 0;
125  for (auto& candidate : candidates) {
126  LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
127  const auto& [model_index, inputs] = getScaledInputs(candidate);
128  counts[model_index] += 1;
129  indexMap[model_index].push_back(icand);
130  inputsVectors[icand] = inputs;
131  icand++;
132  }
133 
134  // Prepare one input tensors for each model
135  std::vector<tensorflow::Tensor> input_tensors(nModels_);
136  // Pointers for filling efficiently the input tensors
137  std::vector<float*> input_tensors_pointer(nModels_);
138  for (size_t i = 0; i < nModels_; i++) {
139  LogDebug("EgammaDNNHelper") << "Initializing TF input " << i << " with rows:" << counts[i]
140  << " and cols:" << nInputs_[i];
141  input_tensors[i] = tensorflow::Tensor{tensorflow::DT_FLOAT, {counts[i], nInputs_[i]}};
142  input_tensors_pointer[i] = input_tensors[i].flat<float>().data();
143  }
144 
145  // Filling the input tensors
146  for (size_t m = 0; m < nModels_; m++) {
147  LogDebug("EgammaDNNHelper") << "Loading TF input tensor for model: " << m;
148  float* T = input_tensors_pointer[m];
149  for (size_t cand_index : indexMap[m]) {
150  for (size_t k = 0; k < nInputs_[m]; k++, T++) { //Note the input tensor pointer incremented
151  *T = inputsVectors[cand_index][k];
152  }
153  }
154  }
155 
156  // Define the output and run
157  // The initial output is [(cand_index,(model_index, outputs)),.. ]
158  std::vector<std::pair<uint, std::pair<uint, std::vector<float>>>> outputs;
159  // Run all the models
160  for (size_t m = 0; m < nModels_; m++) {
161  if (counts[m] == 0)
162  continue; //Skip model witout inputs
163  std::vector<tensorflow::Tensor> output;
164  LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << "objects";
165  tensorflow::run(sessions[m], {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
166  // Get the output and save the ElectronDNNEstimator::outputDim numbers along with the ele index
167  const auto& r = output[0].tensor<float, 2>();
168  // Iterate on the list of elements in the batch --> many electrons
169  LogDebug("EgammaDNNHelper") << "Model " << m << " has " << cfg_.outputDim[m] << " nodes!";
170  for (uint b = 0; b < counts[m]; b++) {
171  //auto outputDim=cfg_.outputDim;
172  std::vector<float> result(cfg_.outputDim[m]);
173  for (size_t k = 0; k < cfg_.outputDim[m]; k++) {
174  result[k] = r(b, k);
175  LogDebug("EgammaDNNHelper") << "For Object " << b + 1 << " : Node " << k + 1 << " score = " << r(b, k);
176  }
177  // Get the original index of the electorn in the original order
178  const auto cand_index = indexMap[m][b];
179  outputs.push_back(std::make_pair(cand_index, std::make_pair(m, result)));
180  }
181  }
182  // Now we have just to re-order the outputs
183  std::sort(outputs.begin(), outputs.end());
184  std::vector<std::pair<uint, std::vector<float>>> final_outputs(outputs.size());
185  std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
186 
187  return final_outputs;
188 }
std::vector< std::unique_ptr< const tensorflow::GraphDef > > graphDefs_
std::pair< uint, std::vector< float > > getScaledInputs(const std::map< std::string, float > &variables) const
std::string fullPath() const
Definition: FileInPath.cc:161
std::vector< tensorflow::Session * > getSessions() const
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:120
std::vector< unsigned int > outputDim
EgammaDNNHelper(const DNNConfiguration &, const ModelSelector &sel, const std::vector< std::string > &availableVars)
std::vector< std::string > modelsFiles
std::vector< uint > nInputs_
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
std::vector< std::vector< ScalerConfiguration > > featuresMap_
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:259
Session * createSession()
Definition: TensorFlow.cc:137
void initScalerFiles(const std::vector< std::string > &availableVars)
double b
Definition: hdecay.h:120
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
double a
Definition: hdecay.h:121
std::vector< std::pair< uint, std::vector< float > > > evaluate(const std::vector< std::map< std::string, float >> &candidates, const std::vector< tensorflow::Session *> &sessions) const
std::vector< std::string > scalersFiles
std::function< uint(const std::map< std::string, float > &)> ModelSelector
Definition: output.py:1
const ModelSelector modelSelector_
long double T
const DNNConfiguration cfg_
#define LogDebug(id)
unsigned transform(const HcalDetId &id, unsigned transformCode)