CMS 3D CMS Logo

DeepSCGraphEvaluation.cc
Go to the documentation of this file.
4 #include "TMath.h"
5 #include <iostream>
6 #include <fstream>
7 using namespace reco;
8 
9 const std::vector<std::string> DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy",
10  "cl_et",
11  "cl_eta",
12  "cl_phi",
13  "cl_ieta",
14  "cl_iphi",
15  "cl_iz",
16  "cl_seed_dEta",
17  "cl_seed_dPhi",
18  "cl_seed_dEnergy",
19  "cl_seed_dEt",
20  "cl_nxtals"};
21 const std::vector<std::string> DeepSCGraphEvaluation::availableWindowInputs = {
22  "max_cl_energy", "max_cl_et", "max_cl_eta", "max_cl_phi", "max_cl_ieta", "max_cl_iphi",
23  "max_cl_iz", "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals",
24  "min_cl_energy", "min_cl_et", "min_cl_eta", "min_cl_phi", "min_cl_ieta", "min_cl_iphi",
25  "min_cl_iz", "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals",
26  "avg_cl_energy", "avg_cl_et", "avg_cl_eta", "avg_cl_phi", "avg_cl_ieta", "avg_cl_iphi",
27  "avg_cl_iz", "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"};
28 const std::vector<std::string> DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"};
29 
32  // Init TF graph and session objects
34  // Init scaler configs
38  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
39  << "parameters in the scaler file.";
40  }
44  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
45  << "parameters in the scaler file.";
46  }
48  if (inputFeaturesHits.size() != cfg_.nHitsFeatures) {
49  throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and "
50  << "parameters in the scaler file.";
51  }
52 }
53 
55  if (session_ != nullptr)
57 }
58 
60  // load the graph definition
61  LogDebug("DeepSCGraphEvaluation") << "Loading graph";
62  graphDef_ =
63  std::unique_ptr<tensorflow::GraphDef>(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath()));
64  LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions";
66  LogDebug("DeepSCGraphEvaluation") << "TF ready";
67 }
68 
70  std::string file, const std::vector<std::string>& availableInputs) const {
72  LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath();
73  std::ifstream inputfile{edm::FileInPath(file).fullPath()};
74  if (inputfile.fail()) {
75  throw cms::Exception("MissingFile") << "Input features config file not found: " << file;
76  } else {
77  // Now read mean, scale factors for each variable
78  float par1, par2;
79  std::string varName, type_str;
81  while (inputfile >> varName >> type_str >> par1 >> par2) {
82  if (type_str == "MeanRms")
84  else if (type_str == "MinMax")
86  else
87  type = DeepSCInputs::ScalerType::None; //do nothing
88  features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2});
89  // Protection for mismatch between requested variables and the available ones
90  auto match = std::find(availableInputs.begin(), availableInputs.end(), varName);
91  if (match == std::end(availableInputs)) {
92  throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs";
93  }
94  LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str;
95  }
96  }
97  return features;
98 }
99 
101  const DeepSCInputs::InputConfigs& config) const {
102  std::vector<float> inputs;
103  inputs.reserve(config.size());
104  // Loop on the list of requested variables and scaling values
105  // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax
106  for (auto& [varName, type, par1, par2] : config) {
108  inputs.push_back((variables.at(varName) - par1) / par2);
110  inputs.push_back((variables.at(varName) - par1) / (par2 - par1));
111  else if (type == DeepSCInputs::ScalerType::None) {
112  inputs.push_back(variables.at(varName)); // Do nothing on the variable
113  }
114  //Protection for mismatch between requested variables and the available ones
115  // have been added when the scaler config are loaded --> here we know that the variables are available
116  }
117  return inputs;
118 }
119 
120 std::vector<std::vector<float>> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const {
121  LogDebug("DeepSCGraphEvaluation") << "Starting evaluation";
122 
123  // Final output
124  std::vector<std::vector<float>> outputs_clustering;
125 
126  // We need to split the total inputs in N batches of size batchSize (configured in the producer)
127  // being careful with the last batch which will have less than batchSize elements
128  size_t nInputs = inputs.clustersX.size();
129  uint iB = -1; // batch index
130  while (nInputs > 0) {
131  iB++; // go to next batch
132  size_t nItems;
133  if (nInputs >= cfg_.batchSize) {
134  nItems = cfg_.batchSize;
135  nInputs -= cfg_.batchSize;
136  } else {
137  nItems = nInputs;
138  nInputs = 0;
139  }
140 
141  // Inputs
142  tensorflow::Tensor clsX_{tensorflow::DT_FLOAT,
143  {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}};
144  tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.nWindowFeatures}};
145  tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT,
146  {static_cast<long int>(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}};
147  tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems), cfg_.maxNClusters, 1}};
148  tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast<long int>(nItems)}};
149 
150  // Look on batch dim
151  for (size_t b = 0; b < nItems; b++) {
152  const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b];
153  // Loop on clusters
154  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
155  // Loop on features
156  for (size_t z = 0; z < cfg_.nClusterFeatures; z++) {
157  if (k < cls_data.size()) {
158  clsX_.tensor<float, 3>()(b, k, z) = float(cls_data[k][z]);
159  } else {
160  clsX_.tensor<float, 3>()(b, k, z) = 0.;
161  }
162  }
163  }
164  }
165 
166  // Look on batch dim
167  for (size_t b = 0; b < nItems; b++) {
168  const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b];
169  // Loop on features
170  for (size_t k = 0; k < cfg_.nWindowFeatures; k++) {
171  windX_.matrix<float>()(b, k) = float(wind_features[k]);
172  }
173  }
174 
175  // Look on batch dim
176  for (size_t b = 0; b < nItems; b++) {
177  const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b];
178  size_t ncls_in_window = hits_data.size();
179  // Loop on clusters
180  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
181  // Check padding
182  size_t nhits_in_cluster;
183  if (k < ncls_in_window)
184  nhits_in_cluster = hits_data[k].size();
185  else
186  nhits_in_cluster = 0;
187 
188  // Loop on hits
189  for (size_t j = 0; j < cfg_.maxNRechits; j++) {
190  // Check the number of clusters and hits for padding
191  bool ok = j < nhits_in_cluster;
192  // Loop on rechits features
193  for (size_t z = 0; z < cfg_.nHitsFeatures; z++) {
194  if (ok)
195  hitsX_.tensor<float, 4>()(b, k, j, z) = float(hits_data[k][j][z]);
196  else
197  hitsX_.tensor<float, 4>()(b, k, j, z) = 0.;
198  }
199  }
200  }
201  }
202 
203  // Look on batch dim
204  for (size_t b = 0; b < nItems; b++) {
205  const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b];
206  // Loop on clusters
207  for (size_t k = 0; k < cfg_.maxNClusters; k++) {
208  if (k < isSeed_data.size()) {
209  isSeedX_.tensor<float, 3>()(b, k, 0) = float(isSeed_data[k]);
210  } else {
211  isSeedX_.tensor<float, 3>()(b, k, 0) = 0.;
212  }
213  }
214  }
215 
216  for (size_t b = 0; b < nItems; b++) {
217  nClsSize_.vec<float>()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size());
218  }
219 
220  std::vector<std::pair<std::string, tensorflow::Tensor>> feed_dict = {
221  {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}};
222 
223  // prepare tensorflow outputs
224  std::vector<tensorflow::Tensor> outputs_tf;
225  // // Define the output and run
226  // // Run the models
227  LogDebug("DeepSCGraphEvaluation") << "Run model";
228  tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf);
229  // Reading the 1st output: clustering probability
230  const auto& y_cl = outputs_tf[0].tensor<float, 3>();
231  // Iterate on the clusters for each window
232  for (size_t b = 0; b < nItems; b++) {
233  uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size();
234  std::vector<float> cl_output(ncls);
235  for (size_t iC = 0; iC < ncls; iC++) {
236  if (iC < cfg_.maxNClusters) {
237  float y = y_cl(b, iC, 0);
238  // Applying sigmoid to logit
239  cl_output[iC] = 1 / (1 + std::exp(-y));
240  } else {
241  // The number of clusters is over the padding max dim
242  cl_output[iC] = 0;
243  }
244  }
245  outputs_clustering.push_back(cl_output);
246  }
247  }
248 
249  return outputs_clustering;
250 }
std::vector< float > getScaledInputs(const DeepSCInputs::FeaturesMap &variables, const DeepSCInputs::InputConfigs &config) const
const DeepSCConfiguration cfg_
std::string fullPath() const
Definition: FileInPath.cc:161
GraphDef * loadGraphDef(const std::string &pbFile)
Definition: TensorFlow.cc:129
DeepSCGraphEvaluation(const DeepSCConfiguration &)
Definition: config.py:1
DeepSCInputs::InputConfigs inputFeaturesClusters
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19
static const std::vector< std::string > availableClusterInputs
static const std::vector< std::string > availableWindowInputs
DeepSCInputs::InputConfigs inputFeaturesWindows
std::map< std::string, double > FeaturesMap
std::vector< InputConfig > InputConfigs
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
DeepSCInputs::InputConfigs readInputFeaturesConfig(std::string file, const std::vector< std::string > &availableInputs) const
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:268
bool closeSession(Session *&session)
Definition: TensorFlow.cc:243
DeepSCInputs::InputConfigs inputFeaturesHits
static const std::vector< std::string > availableHitsInputs
Session * createSession()
Definition: TensorFlow.cc:146
void setLogging(const std::string &level="3")
Definition: TensorFlow.cc:90
double b
Definition: hdecay.h:120
fixed size matrix
std::unique_ptr< tensorflow::GraphDef > graphDef_
tensorflow::Session * session_
std::vector< std::vector< float > > evaluate(const DeepSCInputs::Inputs &inputs) const
#define LogDebug(id)