CMS 3D CMS Logo

EgammaDNNHelper.cc
Go to the documentation of this file.
1 
5 #include <iostream>
6 #include <fstream>
7 using namespace egammaTools;
8 
11  const std::vector<std::string>& availableVars)
12  : cfg_(cfg), modelSelector_(modelSelector), nModels_(cfg_.modelsFiles.size()), graphDefs_(cfg_.modelsFiles.size()) {
14  initScalerFiles(availableVars);
15 }
16 
18  // load the graph definition
19  LogDebug("EgammaDNNHelper") << "Loading " << nModels_ << " graphs";
20  size_t i = 0;
21  for (const auto& model_file : cfg_.modelsFiles) {
22  graphDefs_[i] =
23  std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(model_file).fullPath()));
24  i++;
25  }
26 }
27 
28 std::vector<tensorflow::Session*> EgammaDNNHelper::getSessions() const {
29  std::vector<tensorflow::Session*> sessions;
30  LogDebug("EgammaDNNHelper") << "Starting " << nModels_ << " TF sessions";
31  for (const auto& graphDef : graphDefs_) {
32  sessions.push_back(tensorflow::createSession(graphDef.get()));
33  }
34  LogDebug("EgammaDNNHelper") << "TF sessions started";
35  return sessions;
36 }
37 
38 void EgammaDNNHelper::initScalerFiles(const std::vector<std::string>& availableVars) {
39  for (const auto& scaler_file : cfg_.scalersFiles) {
40  // Parse scaler configuration
41  std::vector<ScalerConfiguration> features;
42  std::ifstream inputfile_scaler{edm::FileInPath(scaler_file).fullPath()};
43  int ninputs = 0;
44  if (inputfile_scaler.fail()) {
45  throw cms::Exception("MissingFile") << "Scaler file for PFid DNN not found";
46  } else {
47  // Now read mean, scale factors for each variable
48  float par1, par2;
49  std::string varName, type_str;
50  uint type;
51  while (inputfile_scaler >> varName >> type_str >> par1 >> par2) {
52  if (type_str == "stdscale")
53  type = 1;
54  else if (type_str == "minmax")
55  type = 2;
56  else if (type_str == "custom1") // 2*((X_train - minValues)/(MaxMinusMin)) -1.0
57  type = 3;
58  else
59  type = 0;
60  features.push_back(ScalerConfiguration{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
61  // Protection for mismatch between requested variables and the available ones
62  auto match = std::find(availableVars.begin(), availableVars.end(), varName);
63  if (match == std::end(availableVars)) {
64  throw cms::Exception("MissingVariable")
65  << "Requested variable (" << varName << ") not available between DNN inputs";
66  }
67  ninputs += 1;
68  }
69  }
70  inputfile_scaler.close();
71  featuresMap_.push_back(features);
72  nInputs_.push_back(ninputs);
73  }
74 }
75 
76 std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
77  const std::map<std::string, float>& variables) const {
78  // Call the modelSelector function passing the variables map to return
79  // the modelIndex to be used for the current candidate
80  const auto modelIndex = modelSelector_(variables);
81  std::vector<float> inputs;
82  // Loop on the list of requested variables and scaling values for the specific modelIndex
83  // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
84  for (auto& [varName, type, par1, par2] : featuresMap_[modelIndex]) {
85  if (type == 1) // Standard scaling
86  inputs.push_back((variables.at(varName) - par1) / par2);
87  else if (type == 2) // MinMax
88  inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
89  else if (type == 3) //2*((X_train - minValues)/(MaxMinusMin)) -1.0
90  inputs.push_back(2 * (variables.at(varName) - par1) / (par2 - par1) - 1.);
91  else {
92  inputs.push_back(variables.at(varName)); // Do nothing on the variable
93  }
94  //Protection for mismatch between requested variables and the available ones
95  // have been added when the scaler config are loaded --> here we know that the variables are available
96  }
97  return std::make_pair(modelIndex, inputs);
98 }
99 
100 std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
101  const std::vector<std::map<std::string, float>>& candidates,
102  const std::vector<tensorflow::Session*>& sessions) const {
103  /*
104  Evaluate the PFID DNN for all the electrons/photons.
105  nModels_ are defined depending on modelIndex --> we need to build N input tensors to evaluate
106  the DNNs with batching.
107 
108  1) Get all the variable for each candidate vector<map<string:float>>
109  2) Scale the input and select the variables for each model
110  2) Prepare the input tensors for the models
111  3) Run the models and get the output for each candidate
112  4) Sort the output by candidate index
113  5) Return the DNN outputs along with the model index used on it
114 
115  */
116  size_t nCandidates = candidates.size();
117  std::vector<std::vector<uint>> indexMap(nModels_); // for each model; the list of candidate index is saved
118  std::vector<std::vector<float>> inputsVectors(nCandidates);
119  std::vector<uint> counts(nModels_);
120 
121  LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
122 
123  uint icand = 0;
124  for (auto& candidate : candidates) {
125  LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
126  const auto& [model_index, inputs] = getScaledInputs(candidate);
127  counts[model_index] += 1;
128  indexMap[model_index].push_back(icand);
129  inputsVectors[icand] = inputs;
130  icand++;
131  }
132 
133  // Prepare one input tensors for each model
134  std::vector<tensorflow::Tensor> input_tensors(nModels_);
135  // Pointers for filling efficiently the input tensors
136  std::vector<float*> input_tensors_pointer(nModels_);
137  for (size_t i = 0; i < nModels_; i++) {
138  LogDebug("EgammaDNNHelper") << "Initializing TF input " << i << " with rows:" << counts[i]
139  << " and cols:" << nInputs_[i];
140  input_tensors[i] = tensorflow::Tensor{tensorflow::DT_FLOAT, {counts[i], nInputs_[i]}};
141  input_tensors_pointer[i] = input_tensors[i].flat<float>().data();
142  }
143 
144  // Filling the input tensors
145  for (size_t m = 0; m < nModels_; m++) {
146  LogDebug("EgammaDNNHelper") << "Loading TF input tensor for model: " << m;
147  float* T = input_tensors_pointer[m];
148  for (size_t cand_index : indexMap[m]) {
149  for (size_t k = 0; k < nInputs_[m]; k++, T++) { //Note the input tensor pointer incremented
150  *T = inputsVectors[cand_index][k];
151  }
152  }
153  }
154 
155  // Define the output and run
156  // The initial output is [(cand_index,(model_index, outputs)),.. ]
157  std::vector<std::pair<uint, std::pair<uint, std::vector<float>>>> outputs;
158  // Run all the models
159  for (size_t m = 0; m < nModels_; m++) {
160  if (counts[m] == 0)
161  continue; //Skip model witout inputs
162  std::vector<tensorflow::Tensor> output;
163  LogDebug("EgammaDNNHelper") << "Run model: " << m << " with " << counts[m] << "objects";
164  tensorflow::run(sessions[m], {{cfg_.inputTensorName, input_tensors[m]}}, {cfg_.outputTensorName}, &output);
165  // Get the output and save the ElectronDNNEstimator::outputDim numbers along with the ele index
166  const auto& r = output[0].tensor<float, 2>();
167  // Iterate on the list of elements in the batch --> many electrons
168  LogDebug("EgammaDNNHelper") << "Model " << m << " has " << cfg_.outputDim[m] << " nodes!";
169  for (uint b = 0; b < counts[m]; b++) {
170  //auto outputDim=cfg_.outputDim;
171  std::vector<float> result(cfg_.outputDim[m]);
172  for (size_t k = 0; k < cfg_.outputDim[m]; k++) {
173  result[k] = r(b, k);
174  LogDebug("EgammaDNNHelper") << "For Object " << b + 1 << " : Node " << k + 1 << " score = " << r(b, k);
175  }
176  // Get the original index of the electorn in the original order
177  const auto cand_index = indexMap[m][b];
178  outputs.push_back(std::make_pair(cand_index, std::make_pair(m, result)));
179  }
180  }
181  // Now we have just to re-order the outputs
182  std::sort(outputs.begin(), outputs.end());
183  std::vector<std::pair<uint, std::vector<float>>> final_outputs(outputs.size());
184  std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
185 
186  return final_outputs;
187 }
size
Write out results.
std::vector< std::unique_ptr< const tensorflow::GraphDef > > graphDefs_
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:84
std::pair< uint, std::vector< float > > getScaledInputs(const std::map< std::string, float > &variables) const
std::string fullPath() const
Definition: FileInPath.cc:161
std::vector< tensorflow::Session * > getSessions() const
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:67
std::vector< unsigned int > outputDim
EgammaDNNHelper(const DNNConfiguration &, const ModelSelector &sel, const std::vector< std::string > &availableVars)
std::vector< std::string > modelsFiles
std::vector< uint > nInputs_
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
std::vector< std::vector< ScalerConfiguration > > featuresMap_
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:222
void initScalerFiles(const std::vector< std::string > &availableVars)
double b
Definition: hdecay.h:118
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
double a
Definition: hdecay.h:119
std::vector< std::pair< uint, std::vector< float > > > evaluate(const std::vector< std::map< std::string, float >> &candidates, const std::vector< tensorflow::Session *> &sessions) const
std::vector< std::string > scalersFiles
std::function< uint(const std::map< std::string, float > &)> ModelSelector
Definition: output.py:1
const ModelSelector modelSelector_
long double T
const DNNConfiguration cfg_
#define LogDebug(id)
unsigned transform(const HcalDetId &id, unsigned transformCode)