CMS 3D CMS Logo

PatternRecognitionbyFastJet.cc
Go to the documentation of this file.
1 // Author: Marco Rovere - marco.rovere@cern.ch
2 // Date: 10/2021
3 #include <algorithm>
4 #include <set>
5 #include <vector>
6 
7 #include "tbb/task_arena.h"
8 #include "tbb/tbb.h"
9 
17 
18 #include "TrackstersPCA.h"
22 
23 #include "fastjet/ClusterSequence.hh"
24 
25 using namespace ticl;
26 using namespace fastjet;
27 
28 template <typename TILES>
31  : PatternRecognitionAlgoBaseT<TILES>(conf, iC),
32  caloGeomToken_(iC.esConsumes<CaloGeometry, CaloGeometryRecord>()),
33  antikt_radius_(conf.getParameter<double>("antikt_radius")),
34  minNumLayerCluster_(conf.getParameter<int>("minNumLayerCluster")),
35  eidInputName_(conf.getParameter<std::string>("eid_input_name")),
36  eidOutputNameEnergy_(conf.getParameter<std::string>("eid_output_name_energy")),
37  eidOutputNameId_(conf.getParameter<std::string>("eid_output_name_id")),
38  eidMinClusterEnergy_(conf.getParameter<double>("eid_min_cluster_energy")),
39  eidNLayers_(conf.getParameter<int>("eid_n_layers")),
40  eidNClusters_(conf.getParameter<int>("eid_n_clusters")),
41  computeLocalTime_(conf.getParameter<bool>("computeLocalTime")){};
42 
43 template <typename TILES>
44 void PatternRecognitionbyFastJet<TILES>::buildJetAndTracksters(std::vector<PseudoJet> &fjInputs,
45  std::vector<ticl::Trackster> &result) {
47  edm::LogVerbatim("PatternRecogntionbyFastJet")
48  << "Creating FastJet with " << fjInputs.size() << " LayerClusters in input";
49  }
50  fastjet::ClusterSequence sequence(fjInputs, JetDefinition(antikt_algorithm, antikt_radius_));
51  auto jets = fastjet::sorted_by_pt(sequence.inclusive_jets(0));
53  edm::LogVerbatim("PatternRecogntionbyFastJet") << "FastJet produced " << jets.size() << " jets/trackster";
54  }
55 
56  auto trackster_idx = result.size();
57  auto jetsSize = std::count_if(jets.begin(), jets.end(), [this](fastjet::PseudoJet jet) {
58  return jet.constituents().size() > static_cast<unsigned int>(minNumLayerCluster_);
59  });
60  result.resize(trackster_idx + jetsSize);
61 
62  for (const auto &pj : jets) {
63  if (pj.constituents().size() > static_cast<unsigned int>(minNumLayerCluster_)) {
64  for (const auto &component : pj.constituents()) {
65  result[trackster_idx].vertices().push_back(component.user_index());
66  result[trackster_idx].vertex_multiplicity().push_back(1);
68  edm::LogVerbatim("PatternRecogntionbyFastJet")
69  << "Jet has " << pj.constituents().size() << " components that are stored in trackster " << trackster_idx;
70  }
71  }
72  trackster_idx++;
73  } else {
75  edm::LogVerbatim("PatternRecogntionbyFastJet")
76  << "Jet with " << pj.constituents().size() << " constituents discarded since too small wrt "
77  << minNumLayerCluster_;
78  }
79  }
80  }
81  fjInputs.clear();
82 }
83 
84 template <typename TILES>
87  std::vector<Trackster> &result,
88  std::unordered_map<int, std::vector<int>> &seedToTracksterAssociation) {
89  // Protect from events with no seeding regions
90  if (input.regions.empty())
91  return;
92 
93  edm::EventSetup const &es = input.es;
94  const CaloGeometry &geom = es.getData(caloGeomToken_);
95  rhtools_.setGeometry(geom);
96 
100 
101  // We need to partition the two sides of the HGCAL detector
102  auto lastLayerPerSide = static_cast<unsigned int>(rhtools_.lastLayer(isHFnose)) - 1;
103  unsigned int maxLayer = 2 * lastLayerPerSide - 1;
104  std::vector<fastjet::PseudoJet> fjInputs;
105  fjInputs.clear();
106  for (unsigned int currentLayer = 0; currentLayer <= maxLayer; ++currentLayer) {
107  if (currentLayer == lastLayerPerSide) {
108  buildJetAndTracksters(fjInputs, result);
109  }
110  const auto &tileOnLayer = input.tiles[currentLayer];
111  for (int ieta = 0; ieta <= nEtaBin; ++ieta) {
112  auto offset = ieta * nPhiBin;
114  edm::LogVerbatim("PatternRecogntionbyFastJet") << "offset: " << offset;
115  }
116  for (int iphi = 0; iphi <= nPhiBin; ++iphi) {
118  edm::LogVerbatim("PatternRecogntionbyFastJet") << "iphi: " << iphi;
119  edm::LogVerbatim("PatternRecogntionbyFastJet") << "Entries in tileBin: " << tileOnLayer[offset + iphi].size();
120  }
121  for (auto clusterIdx : tileOnLayer[offset + iphi]) {
122  // Skip masked layer clusters
123  if (input.mask[clusterIdx] == 0.) {
125  edm::LogVerbatim("PatternRecogntionbyFastJet") << "Skipping masked layerIdx " << clusterIdx;
126  }
127  continue;
128  }
129  // Should we correct for the position of the PV?
130  auto const &cl = input.layerClusters[clusterIdx];
131  math::XYZVector direction(cl.x(), cl.y(), cl.z());
132  direction = direction.Unit();
133  direction *= cl.energy();
134  auto fpj = fastjet::PseudoJet(direction.X(), direction.Y(), direction.Z(), cl.energy());
135  fpj.set_user_index(clusterIdx);
136  fjInputs.push_back(fpj);
137  } // End of loop on the clusters on currentLayer
138  } // End of loop over phi-bin region
139  } // End of loop over eta-bin region
140  } // End of loop over layers
141 
142  // Collect the jet from the other side wrt to the one taken care of inside the main loop above.
143  buildJetAndTracksters(fjInputs, result);
144 
146  input.layerClusters,
147  input.layerClustersTime,
148  rhtools_.getPositionLayer(rhtools_.lastLayerEE(isHFnose), isHFnose).z(),
149  rhtools_,
150  computeLocalTime_);
151 
152  // run energy regression and ID
153  energyRegressionAndID(input.layerClusters, input.tfSession, result);
155  for (auto const &t : result) {
156  edm::LogVerbatim("PatternRecogntionbyFastJet") << "Barycenter: " << t.barycenter();
157  edm::LogVerbatim("PatternRecogntionbyFastJet") << "LCs: " << t.vertices().size();
158  edm::LogVerbatim("PatternRecogntionbyFastJet") << "Energy: " << t.raw_energy();
159  edm::LogVerbatim("PatternRecogntionbyFastJet") << "Regressed: " << t.regressed_energy();
160  }
161  }
162 }
163 
164 template <typename TILES>
166  const tensorflow::Session *eidSession,
167  std::vector<Trackster> &tracksters) {
168  // Energy regression and particle identification strategy:
169  //
170  // 1. Set default values for regressed energy and particle id for each trackster.
171  // 2. Store indices of tracksters whose total sum of cluster energies is above the
172  // eidMinClusterEnergy_ (GeV) threshold. Inference is not applied for soft tracksters.
173  // 3. When no trackster passes the selection, return.
174  // 4. Create input and output tensors. The batch dimension is determined by the number of
175  // selected tracksters.
176  // 5. Fill input tensors with layer cluster features. Per layer, clusters are ordered descending
177  // by energy. Given that tensor data is contiguous in memory, we can use pointer arithmetic to
178  // fill values, even with batching.
179  // 6. Zero-fill features for empty clusters in each layer.
180  // 7. Batched inference.
181  // 8. Assign the regressed energy and id probabilities to each trackster.
182  //
183  // Indices used throughout this method:
184  // i -> batch element / trackster
185  // j -> layer
186  // k -> cluster
187  // l -> feature
188 
189  // set default values per trackster, determine if the cluster energy threshold is passed,
190  // and store indices of hard tracksters
191  std::vector<int> tracksterIndices;
192  for (int i = 0; i < static_cast<int>(tracksters.size()); i++) {
193  // calculate the cluster energy sum (2)
194  // note: after the loop, sumClusterEnergy might be just above the threshold which is enough to
195  // decide whether to run inference for the trackster or not
196  float sumClusterEnergy = 0.;
197  for (const unsigned int &vertex : tracksters[i].vertices()) {
198  sumClusterEnergy += static_cast<float>(layerClusters[vertex].energy());
199  // there might be many clusters, so try to stop early
200  if (sumClusterEnergy >= eidMinClusterEnergy_) {
201  // set default values (1)
202  tracksters[i].setRegressedEnergy(0.f);
203  tracksters[i].zeroProbabilities();
204  tracksterIndices.push_back(i);
205  break;
206  }
207  }
208  }
209 
210  // do nothing when no trackster passes the selection (3)
211  int batchSize = static_cast<int>(tracksterIndices.size());
212  if (batchSize == 0) {
213  return;
214  }
215 
216  // create input and output tensors (4)
217  tensorflow::TensorShape shape({batchSize, eidNLayers_, eidNClusters_, eidNFeatures_});
218  tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
219  tensorflow::NamedTensorList inputList = {{eidInputName_, input}};
220 
221  std::vector<tensorflow::Tensor> outputs;
222  std::vector<std::string> outputNames;
223  if (!eidOutputNameEnergy_.empty()) {
224  outputNames.push_back(eidOutputNameEnergy_);
225  }
226  if (!eidOutputNameId_.empty()) {
227  outputNames.push_back(eidOutputNameId_);
228  }
229 
230  // fill input tensor (5)
231  for (int i = 0; i < batchSize; i++) {
232  const Trackster &trackster = tracksters[tracksterIndices[i]];
233 
234  // per layer, we only consider the first eidNClusters_ clusters in terms of energy, so in order
235  // to avoid creating large / nested structures to do the sorting for an unknown number of total
236  // clusters, create a sorted list of layer cluster indices to keep track of the filled clusters
237  std::vector<int> clusterIndices(trackster.vertices().size());
238  for (int k = 0; k < (int)trackster.vertices().size(); k++) {
239  clusterIndices[k] = k;
240  }
241  sort(clusterIndices.begin(), clusterIndices.end(), [&layerClusters, &trackster](const int &a, const int &b) {
242  return layerClusters[trackster.vertices(a)].energy() > layerClusters[trackster.vertices(b)].energy();
243  });
244 
245  // keep track of the number of seen clusters per layer
246  std::vector<int> seenClusters(eidNLayers_);
247 
248  // loop through clusters by descending energy
249  for (const int &k : clusterIndices) {
250  // get features per layer and cluster and store the values directly in the input tensor
251  const reco::CaloCluster &cluster = layerClusters[trackster.vertices(k)];
252  int j = rhtools_.getLayerWithOffset(cluster.hitsAndFractions()[0].first) - 1;
253  if (j < eidNLayers_ && seenClusters[j] < eidNClusters_) {
254  // get the pointer to the first feature value for the current batch, layer and cluster
255  float *features = &input.tensor<float, 4>()(i, j, seenClusters[j], 0);
256 
257  // fill features
258  *(features++) = float(cluster.energy() / float(trackster.vertex_multiplicity(k)));
259  *(features++) = float(std::abs(cluster.eta()));
260  *(features) = float(cluster.phi());
261 
262  // increment seen clusters
263  seenClusters[j]++;
264  }
265  }
266 
267  // zero-fill features of empty clusters in each layer (6)
268  for (int j = 0; j < eidNLayers_; j++) {
269  for (int k = seenClusters[j]; k < eidNClusters_; k++) {
270  float *features = &input.tensor<float, 4>()(i, j, k, 0);
271  for (int l = 0; l < eidNFeatures_; l++) {
272  *(features++) = 0.f;
273  }
274  }
275  }
276  }
277 
278  // run the inference (7)
280 
281  // store regressed energy per trackster (8)
282  if (!eidOutputNameEnergy_.empty()) {
283  // get the pointer to the energy tensor, dimension is batch x 1
284  float *energy = outputs[0].flat<float>().data();
285 
286  for (const int &i : tracksterIndices) {
287  tracksters[i].setRegressedEnergy(*(energy++));
288  }
289  }
290 
291  // store id probabilities per trackster (8)
292  if (!eidOutputNameId_.empty()) {
293  // get the pointer to the id probability tensor, dimension is batch x id_probabilities.size()
294  int probsIdx = eidOutputNameEnergy_.empty() ? 0 : 1;
295  float *probs = outputs[probsIdx].flat<float>().data();
296 
297  for (const int &i : tracksterIndices) {
298  tracksters[i].setProbabilities(probs);
299  probs += tracksters[i].id_probabilities().size();
300  }
301  }
302 }
303 
304 template <typename TILES>
306  iDesc.add<int>("algo_verbosity", 0);
307  iDesc.add<double>("antikt_radius", 0.09)->setComment("Radius to be used while running the Anti-kt clustering");
308  iDesc.add<int>("minNumLayerCluster", 5)->setComment("Not Inclusive");
309  iDesc.add<std::string>("eid_input_name", "input");
310  iDesc.add<std::string>("eid_output_name_energy", "output/regressed_energy");
311  iDesc.add<std::string>("eid_output_name_id", "output/id_probabilities");
312  iDesc.add<double>("eid_min_cluster_energy", 1.);
313  iDesc.add<int>("eid_n_layers", 50);
314  iDesc.add<int>("eid_n_clusters", 10);
315  iDesc.add<bool>("computeLocalTime", false);
316 }
317 
Log< level::Info, true > LogVerbatim
void setComment(std::string const &value)
ESGetTokenH3DDVariant esConsumes(std::string const &Record, edm::ConsumesCollector &)
Definition: DeDxTools.cc:283
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
const std::vector< std::pair< DetId, float > > & hitsAndFractions() const
Definition: CaloCluster.h:210
T const & getData(const ESGetToken< T, R > &iToken) const noexcept(false)
Definition: EventSetup.h:119
void energyRegressionAndID(const std::vector< reco::CaloCluster > &layerClusters, const tensorflow::Session *, std::vector< Trackster > &result)
void assignPCAtoTracksters(std::vector< Trackster > &tracksters, const std::vector< reco::CaloCluster > &layerClusters, const edm::ValueMap< std::pair< float, float >> &layerClustersTime, double z_limit_em, hgcal::RecHitTools const &rhTools, bool computeLocalTime=false, bool energyWeight=true, bool clean=false, int minLayer=10, int maxLayer=10)
double phi() const
azimuthal angle of cluster centroid
Definition: CaloCluster.h:184
static std::string const input
Definition: EdmProvDump.cc:50
PatternRecognitionbyFastJet(const edm::ParameterSet &conf, edm::ConsumesCollector)
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:271
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
double f[11][100]
string inputList
Definition: crabTemplate.py:6
ParameterDescriptionBase * add(U const &iLabel, T const &value)
void makeTracksters(const typename PatternRecognitionAlgoBaseT< TILES >::Inputs &input, std::vector< Trackster > &result, std::unordered_map< int, std::vector< int >> &seedToTracksterAssociation) override
double energy() const
cluster energy
Definition: CaloCluster.h:149
void buildJetAndTracksters(std::vector< fastjet::PseudoJet > &, std::vector< ticl::Trackster > &)
std::vector< unsigned int > & vertices()
Definition: Trackster.h:57
XYZVectorD XYZVector
spatial vector with cartesian internal representation
Definition: Vector3D.h:31
std::vector< float > & vertex_multiplicity()
Definition: Trackster.h:58
double b
Definition: hdecay.h:120
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
static constexpr int nPhiBins
double a
Definition: hdecay.h:121
Definition: Common.h:10
double eta() const
pseudorapidity of cluster centroid
Definition: CaloCluster.h:181