CMS 3D CMS Logo

PatternRecognitionbyCA.cc
Go to the documentation of this file.
1 // Author: Felice Pantaleo, Marco Rovere - felice.pantaleo@cern.ch, marco.rovere@cern.ch
2 // Date: 11/2018
3 #include <algorithm>
4 #include <set>
5 #include <vector>
6 
10 #include "HGCGraph.h"
11 
12 using namespace ticl;
13 
15  : PatternRecognitionAlgoBase(conf, cache),
16  theGraph_(std::make_unique<HGCGraph>()),
17  out_in_dfs_(conf.getParameter<bool>("out_in_dfs")),
18  max_out_in_hops_(conf.getParameter<int>("max_out_in_hops")),
19  min_cos_theta_(conf.getParameter<double>("min_cos_theta")),
20  min_cos_pointing_(conf.getParameter<double>("min_cos_pointing")),
21  missing_layers_(conf.getParameter<int>("missing_layers")),
22  min_clusters_per_ntuplet_(conf.getParameter<int>("min_clusters_per_ntuplet")),
23  max_delta_time_(conf.getParameter<double>("max_delta_time")),
24  eidInputName_(conf.getParameter<std::string>("eid_input_name")),
25  eidOutputNameEnergy_(conf.getParameter<std::string>("eid_output_name_energy")),
26  eidOutputNameId_(conf.getParameter<std::string>("eid_output_name_id")),
27  eidMinClusterEnergy_(conf.getParameter<double>("eid_min_cluster_energy")),
28  eidNLayers_(conf.getParameter<int>("eid_n_layers")),
29  eidNClusters_(conf.getParameter<int>("eid_n_clusters")),
30  eidSession_(nullptr) {
31  // mount the tensorflow graph onto the session when set
32  const TrackstersCache *trackstersCache = dynamic_cast<const TrackstersCache *>(cache);
33  if (trackstersCache == nullptr || trackstersCache->eidGraphDef == nullptr) {
34  throw cms::Exception("MissingGraphDef")
35  << "PatternRecognitionbyCA received an empty graph definition from the global cache";
36  }
38 }
39 
41 
43  std::vector<Trackster> &result) {
44  rhtools_.getEventSetup(input.es);
45 
46  theGraph_->setVerbosity(algo_verbosity_);
47  theGraph_->clear();
48  if (algo_verbosity_ > None) {
49  LogDebug("HGCPatterRecoByCA") << "Making Tracksters with CA" << std::endl;
50  }
51  std::vector<HGCDoublet::HGCntuplet> foundNtuplets;
52  std::vector<int> seedIndices;
53  std::vector<uint8_t> layer_cluster_usage(input.layerClusters.size(), 0);
54  theGraph_->makeAndConnectDoublets(input.tiles,
55  input.regions,
58  input.layerClusters,
59  input.mask,
60  input.layerClustersTime,
61  1,
62  1,
68 
69  theGraph_->findNtuplets(foundNtuplets, seedIndices, min_clusters_per_ntuplet_, out_in_dfs_, max_out_in_hops_);
70  //#ifdef FP_DEBUG
71  const auto &doublets = theGraph_->getAllDoublets();
72  int tracksterId = 0;
73  for (auto const &ntuplet : foundNtuplets) {
74  std::set<unsigned int> effective_cluster_idx;
75  for (auto const &doublet : ntuplet) {
76  auto innerCluster = doublets[doublet].innerClusterId();
77  auto outerCluster = doublets[doublet].outerClusterId();
78  effective_cluster_idx.insert(innerCluster);
79  effective_cluster_idx.insert(outerCluster);
80  if (algo_verbosity_ > Advanced) {
81  LogDebug("HGCPatterRecoByCA") << "New doublet " << doublet << " for trackster: " << result.size() << " InnerCl "
82  << innerCluster << " " << input.layerClusters[innerCluster].x() << " "
83  << input.layerClusters[innerCluster].y() << " "
84  << input.layerClusters[innerCluster].z() << " OuterCl " << outerCluster << " "
85  << input.layerClusters[outerCluster].x() << " "
86  << input.layerClusters[outerCluster].y() << " "
87  << input.layerClusters[outerCluster].z() << " " << tracksterId << std::endl;
88  }
89  }
90  for (auto const i : effective_cluster_idx) {
91  layer_cluster_usage[i]++;
92  LogDebug("HGCPatterRecoByCA") << "LayerID: " << i << " count: " << (int)layer_cluster_usage[i] << std::endl;
93  }
94  // Put back indices, in the form of a Trackster, into the results vector
95  Trackster tmp;
96  tmp.vertices.reserve(effective_cluster_idx.size());
97  tmp.vertex_multiplicity.resize(effective_cluster_idx.size(), 0);
98  //regions and seedIndices can have different size
99  //if a seeding region does not lead to any trackster
100  tmp.seedID = input.regions[0].collectionID;
101  tmp.seedIndex = seedIndices[tracksterId];
102  std::copy(std::begin(effective_cluster_idx), std::end(effective_cluster_idx), std::back_inserter(tmp.vertices));
103  result.push_back(tmp);
104  tracksterId++;
105  }
106 
107  for (auto &trackster : result) {
108  assert(trackster.vertices.size() <= trackster.vertex_multiplicity.size());
109  for (size_t i = 0; i < trackster.vertices.size(); ++i) {
110  trackster.vertex_multiplicity[i] = layer_cluster_usage[trackster.vertices[i]];
111  LogDebug("HGCPatterRecoByCA") << "LayerID: " << trackster.vertices[i]
112  << " count: " << (int)trackster.vertex_multiplicity[i] << std::endl;
113  }
114  }
115 
116  // run energy regression and ID
117  energyRegressionAndID(input.layerClusters, result);
118 }
119 
120 void PatternRecognitionbyCA::energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
121  std::vector<Trackster> &tracksters) {
122  // Energy regression and particle identification strategy:
123  //
124  // 1. Set default values for regressed energy and particle id for each trackster.
125  // 2. Store indices of tracksters whose total sum of cluster energies is above the
126  // eidMinClusterEnergy_ (GeV) treshold. Inference is not applied for soft tracksters.
127  // 3. When no trackster passes the selection, return.
128  // 4. Create input and output tensors. The batch dimension is determined by the number of
129  // selected tracksters.
130  // 5. Fill input tensors with layer cluster features. Per layer, clusters are ordered descending
131  // by energy. Given that tensor data is contiguous in memory, we can use pointer arithmetic to
132  // fill values, even with batching.
133  // 6. Zero-fill features for empty clusters in each layer.
134  // 7. Batched inference.
135  // 8. Assign the regressed energy and id probabilities to each trackster.
136  //
137  // Indices used throughout this method:
138  // i -> batch element / trackster
139  // j -> layer
140  // k -> cluster
141  // l -> feature
142 
143  // set default values per trackster, determine if the cluster energy threshold is passed,
144  // and store indices of hard tracksters
145  std::vector<int> tracksterIndices;
146  for (int i = 0; i < (int)tracksters.size(); i++) {
147  // set default values (1)
148  tracksters[i].regressed_energy = 0.;
149  for (float &p : tracksters[i].id_probabilities) {
150  p = 0.;
151  }
152 
153  // calculate the cluster energy sum (2)
154  // note: after the loop, sumClusterEnergy might be just above the threshold which is enough to
155  // decide whether to run inference for the trackster or not
156  float sumClusterEnergy = 0.;
157  for (const unsigned int &vertex : tracksters[i].vertices) {
158  sumClusterEnergy += (float)layerClusters[vertex].energy();
159  // there might be many clusters, so try to stop early
160  if (sumClusterEnergy >= eidMinClusterEnergy_) {
161  tracksterIndices.push_back(i);
162  break;
163  }
164  }
165  }
166 
167  // do nothing when no trackster passes the selection (3)
168  int batchSize = (int)tracksterIndices.size();
169  if (batchSize == 0) {
170  return;
171  }
172 
173  // create input and output tensors (4)
174  tensorflow::TensorShape shape({batchSize, eidNLayers_, eidNClusters_, eidNFeatures_});
175  tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
176  tensorflow::NamedTensorList inputList = {{eidInputName_, input}};
177 
178  std::vector<tensorflow::Tensor> outputs;
179  std::vector<std::string> outputNames;
180  if (!eidOutputNameEnergy_.empty()) {
181  outputNames.push_back(eidOutputNameEnergy_);
182  }
183  if (!eidOutputNameId_.empty()) {
184  outputNames.push_back(eidOutputNameId_);
185  }
186 
187  // fill input tensor (5)
188  for (int i = 0; i < batchSize; i++) {
189  const Trackster &trackster = tracksters[tracksterIndices[i]];
190 
191  // per layer, we only consider the first eidNClusters_ clusters in terms of energy, so in order
192  // to avoid creating large / nested structures to do the sorting for an unknown number of total
193  // clusters, create a sorted list of layer cluster indices to keep track of the filled clusters
194  std::vector<int> clusterIndices(trackster.vertices.size());
195  for (int k = 0; k < (int)trackster.vertices.size(); k++) {
196  clusterIndices[k] = k;
197  }
198  sort(clusterIndices.begin(), clusterIndices.end(), [&layerClusters, &trackster](const int &a, const int &b) {
199  return layerClusters[trackster.vertices[a]].energy() > layerClusters[trackster.vertices[b]].energy();
200  });
201 
202  // keep track of the number of seen clusters per layer
203  std::vector<int> seenClusters(eidNLayers_);
204 
205  // loop through clusters by descending energy
206  for (const int &k : clusterIndices) {
207  // get features per layer and cluster and store the values directly in the input tensor
208  const reco::CaloCluster &cluster = layerClusters[trackster.vertices[k]];
209  int j = rhtools_.getLayerWithOffset(cluster.hitsAndFractions()[0].first) - 1;
210  if (j < eidNLayers_ && seenClusters[j] < eidNClusters_) {
211  // get the pointer to the first feature value for the current batch, layer and cluster
212  float *features = &input.tensor<float, 4>()(i, j, seenClusters[j], 0);
213 
214  // fill features
215  *(features++) = float(cluster.eta());
216  *(features++) = float(cluster.phi());
217  *features = float(cluster.energy());
218 
219  // increment seen clusters
220  seenClusters[j]++;
221  }
222  }
223 
224  // zero-fill features of empty clusters in each layer (6)
225  for (int j = 0; j < eidNLayers_; j++) {
226  for (int k = seenClusters[j]; k < eidNClusters_; k++) {
227  float *features = &input.tensor<float, 4>()(i, j, k, 0);
228  for (int l = 0; l < eidNFeatures_; l++) {
229  *(features++) = 0.f;
230  }
231  }
232  }
233  }
234 
235  // run the inference (7)
236  tensorflow::run(eidSession_, inputList, outputNames, &outputs);
237 
238  // store regressed energy per trackster (8)
239  if (!eidOutputNameEnergy_.empty()) {
240  // get the pointer to the energy tensor, dimension is batch x 1
241  float *energy = outputs[0].flat<float>().data();
242 
243  for (const int &i : tracksterIndices) {
244  tracksters[i].regressed_energy = *(energy++);
245  }
246  }
247 
248  // store id probabilities per trackster (8)
249  if (!eidOutputNameId_.empty()) {
250  // get the pointer to the id probability tensor, dimension is batch x id_probabilities.size()
251  int probsIdx = eidOutputNameEnergy_.empty() ? 0 : 1;
252  float *probs = outputs[probsIdx].flat<float>().data();
253 
254  for (const int &i : tracksterIndices) {
255  for (float &p : tracksters[i].id_probabilities) {
256  p = *(probs++);
257  }
258  }
259  }
260 }
#define LogDebug(id)
Session * createSession(SessionOptions &sessionOptions)
Definition: TensorFlow.cc:71
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:25
std::vector< unsigned int > vertices
Definition: Trackster.h:19
constexpr int nPhiBins
Definition: Common.h:12
#define nullptr
constexpr int nEtaBins
Definition: Common.h:11
double eta() const
pseudorapidity of cluster centroid
Definition: CaloCluster.h:180
static std::string const input
Definition: EdmProvDump.cc:48
void getEventSetup(const edm::EventSetup &)
Definition: RecHitTools.cc:70
const edm::ValueMap< float > & layerClustersTime
PatternRecognitionbyCA(const edm::ParameterSet &conf, const CacheBase *cache)
const std::vector< reco::CaloCluster > & layerClusters
const std::vector< std::pair< DetId, float > > & hitsAndFractions() const
Definition: CaloCluster.h:209
void energyRegressionAndID(const std::vector< reco::CaloCluster > &layerClusters, std::vector< Trackster > &result)
void makeTracksters(const PatternRecognitionAlgoBase::Inputs &input, std::vector< Trackster > &result) override
const std::unique_ptr< HGCGraph > theGraph_
std::atomic< tensorflow::GraphDef * > eidGraphDef
Definition: GlobalCache.h:25
double f[11][100]
double energy() const
cluster energy
Definition: CaloCluster.h:148
#define end
Definition: vmac.h:39
const std::vector< TICLSeedingRegion > & regions
unsigned int getLayerWithOffset(const DetId &) const
Definition: RecHitTools.cc:355
double b
Definition: hdecay.h:118
#define begin
Definition: vmac.h:32
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
double a
Definition: hdecay.h:119
def cache(function)
Definition: utilities.py:3
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, const std::vector< std::string > &targetNodes, std::vector< Tensor > *outputs)
Definition: TensorFlow.cc:176
double phi() const
azimuthal angle of cluster centroid
Definition: CaloCluster.h:183
tmp
align.sh
Definition: createJobs.py:716
unsigned int lastLayerFH() const
Definition: RecHitTools.h:63