CMS 3D CMS Logo

DeepSCGraphEvaluation.cc
Go to the documentation of this file.
4 #include "TMath.h"
5 #include <iostream>
6 #include <fstream>
7 using namespace reco;
8 
9 const std::vector<std::string> DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy",
10  "cl_et",
11  "cl_eta",
12  "cl_phi",
13  "cl_ieta",
14  "cl_iphi",
15  "cl_iz",
16  "cl_seed_dEta",
17  "cl_seed_dPhi",
18  "cl_seed_dEnergy",
19  "cl_seed_dEt",
20  "cl_nxtals"};
21 const std::vector<std::string> DeepSCGraphEvaluation::availableWindowInputs = {
22  "max_cl_energy", "max_cl_et", "max_cl_eta", "max_cl_phi", "max_cl_ieta", "max_cl_iphi",
23  "max_cl_iz", "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals",
24  "min_cl_energy", "min_cl_et", "min_cl_eta", "min_cl_phi", "min_cl_ieta", "min_cl_iphi",
25  "min_cl_iz", "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals",
26  "avg_cl_energy", "avg_cl_et", "avg_cl_eta", "avg_cl_phi", "avg_cl_ieta", "avg_cl_iphi",
27  "avg_cl_iz", "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"};
28 const std::vector<std::string> DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"};
29 
31  // Init TF graph and session objects
33  // Init scaler configs
37  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
38  << "parameters in the scaler file.";
39  }
43  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
44  << "parameters in the scaler file.";
45  }
47  if (inputFeaturesHits.size() != cfg_.nHitsFeatures) {
48  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
49  << "parameters in the scaler file.";
50  }
51 }
52 
54  if (session_ != nullptr)
56 }
57 
59  // load the graph definition
60  LogDebug("DeepSCGraphEvaluation") << "Loading graph";
61  graphDef_ =
62  std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath()));
63  LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions";
65  LogDebug("DeepSCGraphEvaluation") << "TF ready";
66 }
67 
69  std::string file, const std::vector<std::string>& availableInputs) const {
71  LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath();
72  std::ifstream inputfile{edm::FileInPath(file).fullPath()};
73  if (inputfile.fail()) {
74  throw cms::Exception("MissingFile") << "Input features config file not found: " << file;
75  } else {
76  // Now read mean, scale factors for each variable
77  float par1, par2;
78  std::string varName, type_str;
80  while (inputfile >> varName >> type_str >> par1 >> par2) {
81  if (type_str == "MeanRms")
83  else if (type_str == "MinMax")
85  else
86  type = DeepSCInputs::ScalerType::None; //do nothing
87  features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
88  // Protection for mismatch between requested variables and the available ones
89  auto match = std::find(availableInputs.begin(), availableInputs.end(), varName);
90  if (match == std::end(availableInputs)) {
91  throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs";
92  }
93  LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str;
94  }
95  }
96  return features;
97 }
98 
100  const DeepSCInputs::InputConfigs& config) const {
101  std::vector<float> inputs;
102  inputs.reserve(config.size());
103  // Loop on the list of requested variables and scaling values
104  // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
105  for (auto& [varName, type, par1, par2] : config) {
107  inputs.push_back((variables.at(varName) - par1) / par2);
109  inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
110  else if (type == DeepSCInputs::ScalerType::None) {
111  inputs.push_back(variables.at(varName)); // Do nothing on the variable
112  }
113  //Protection for mismatch between requested variables and the available ones
114  // have been added when the scaler config are loaded --> here we know that the variables are available
115  }
116  return inputs;
117 }
118 
119 std::vector<std::vector<float>> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const {
120  LogDebug("DeepSCGraphEvaluation") << "Starting evaluation";
121 
122  // Final output
123  std::vector<std::vector<float>> outputs_clustering;
124 
125  // We need to split the total inputs in N batches of size batchSize (configured in the producer)
126  // being careful with the last batch which will have less than batchSize elements
127  size_t nInputs = inputs.clustersX.size();
128  uint iB = -1; // batch index
129  while (nInputs > 0) {
130  iB++; // go to next batch
131  size_t nItems;
132  if (nInputs >= cfg_.batchSize) {
133  nItems = cfg_.batchSize;
134  nInputs -= cfg_.batchSize;
135  } else {
136  nItems = nInputs;
137  nInputs = 0;
138  }
139 
140  // Inputs
141  tensorflow::Tensor clsX_{tensorflow::DT_FLOAT,
142  {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}};
143  tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.nWindowFeatures}};
144  tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT,
145  {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}};
146  tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.maxNClusters, 1}};
147  tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems)}};
148 
149  // Look on batch dim
150  for (size_t b = 0; b < nItems; b++) {
151  const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b];
152  // Loop on clusters
153  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
154  // Loop on features
155  for (size_t z = 0; z < cfg_.nClusterFeatures; z++) {
156  if (k < cls_data.size()) {
157  clsX_.tensor<float, 3>()(b, k, z) = float(cls_data[k][z]);
158  } else {
159  clsX_.tensor<float, 3>()(b, k, z) = 0.;
160  }
161  }
162  }
163  }
164 
165  // Look on batch dim
166  for (size_t b = 0; b < nItems; b++) {
167  const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b];
168  // Loop on features
169  for (size_t k = 0; k < cfg_.nWindowFeatures; k++) {
170  windX_.matrix<float>()(b, k) = float(wind_features[k]);
171  }
172  }
173 
174  // Look on batch dim
175  for (size_t b = 0; b < nItems; b++) {
176  const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b];
177  size_t ncls_in_window = hits_data.size();
178  // Loop on clusters
179  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
180  // Check padding
181  size_t nhits_in_cluster;
182  if (k < ncls_in_window)
183  nhits_in_cluster = hits_data[k].size();
184  else
185  nhits_in_cluster = 0;
186 
187  // Loop on hits
188  for (size_t j = 0; j < cfg_.maxNRechits; j++) {
189  // Check the number of clusters and hits for padding
190  bool ok = j < nhits_in_cluster;
191  // Loop on rechits features
192  for (size_t z = 0; z < cfg_.nHitsFeatures; z++) {
193  if (ok)
194  hitsX_.tensor<float, 4>()(b, k, j, z) = float(hits_data[k][j][z]);
195  else
196  hitsX_.tensor<float, 4>()(b, k, j, z) = 0.;
197  }
198  }
199  }
200  }
201 
202  // Look on batch dim
203  for (size_t b = 0; b < nItems; b++) {
204  const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b];
205  // Loop on clusters
206  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
207  if (k < isSeed_data.size()) {
208  isSeedX_.tensor<float, 3>()(b, k, 0) = float(isSeed_data[k]);
209  } else {
210  isSeedX_.tensor<float, 3>()(b, k, 0) = 0.;
211  }
212  }
213  }
214 
215  for (size_t b = 0; b < nItems; b++) {
216  nClsSize_.vec<float>()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size());
217  }
218 
219  std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict = {
220  {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}};
221 
222  // prepare tensorflow outputs
223  std::vector<tensorflow::Tensor> outputs_tf;
224  // // Define the output and run
225  // // Run the models
226  LogDebug("DeepSCGraphEvaluation") << "Run model";
227  tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf);
228  // Reading the 1st output: clustering probability
229  const auto& y_cl = outputs_tf[0].tensor<float, 3>();
230  // Iterate on the clusters for each window
231  for (size_t b = 0; b < nItems; b++) {
232  uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size();
233  std::vector<float> cl_output(ncls);
234  for (size_t iC = 0; iC < ncls; iC++) {
235  if (iC < cfg_.maxNClusters) {
236  float y = y_cl(b, iC, 0);
237  // Applying sigmoid to logit
238  cl_output[iC] = 1 / (1 + std::exp(-y));
239  } else {
240  // The number of clusters is over the padding max dim
241  cl_output[iC] = 0;
242  }
243  }
244  outputs_clustering.push_back(cl_output);
245  }
246  }
247 
248  return outputs_clustering;
249 }
std::vector< float > getScaledInputs(const DeepSCInputs::FeaturesMap &variables, const DeepSCInputs::InputConfigs &config) const
const DeepSCConfiguration cfg_
std::string fullPath() const
Definition: FileInPath.cc:161
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:119
DeepSCGraphEvaluation(const DeepSCConfiguration &)
Definition: config.py:1
DeepSCInputs::InputConfigs inputFeaturesClusters
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
static const std::vector< std::string > availableClusterInputs
static const std::vector< std::string > availableWindowInputs
DeepSCInputs::InputConfigs inputFeaturesWindows
std::map< std::string, double > FeaturesMap
std::vector< InputConfig > InputConfigs
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
DeepSCInputs::InputConfigs readInputFeaturesConfig(std::string file, const std::vector< std::string > &availableInputs) const
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:271
bool closeSession(Session *&session)
Definition: TensorFlow.cc:233
DeepSCInputs::InputConfigs inputFeaturesHits
static const std::vector< std::string > availableHitsInputs
Session * createSession()
Definition: TensorFlow.cc:136
double b
Definition: hdecay.h:120
fixed size matrix
std::unique_ptr< tensorflow::GraphDef > graphDef_
tensorflow::Session * session_
std::vector< std::vector< float > > evaluate(const DeepSCInputs::Inputs &inputs) const
#define LogDebug(id)