CMS 3D CMS Logo

TracksterLinksProducer.cc
Go to the documentation of this file.
1 // Author: Felice Pantaleo, Wahid Redjeb (CERN) - felice.pantaleo@cern.ch, wahid.redjeb@cern.ch
2 // Date: 12/2023
3 #include <memory> // unique_ptr
16 
18 
23 
29 
33 
36 
39 
43 
45 
46 #include "TrackstersPCA.h"
47 
48 using namespace ticl;
50 
51 class TracksterLinksProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
52 public:
53  explicit TracksterLinksProducer(const edm::ParameterSet &ps, const ONNXRuntime *);
55  void produce(edm::Event &, const edm::EventSetup &) override;
56  static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
57 
58  void beginRun(edm::Run const &iEvent, edm::EventSetup const &es) override;
59  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet &iConfig);
60  static void globalEndJob(const ONNXRuntime *);
61 
62 private:
63  void printTrackstersDebug(const std::vector<Trackster> &, const char *label) const;
64  void dumpTrackster(const Trackster &) const;
65  void energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
66  const tensorflow::Session *,
67  std::vector<Trackster> &result) const;
68 
69  std::unique_ptr<TracksterLinkingAlgoBase> linkingAlgo_;
71 
72  std::vector<edm::EDGetTokenT<std::vector<Trackster>>> tracksters_tokens_;
75 
76  const bool regressionAndPid_;
79  const tensorflow::Session *tfSession_;
83  const float eidMinClusterEnergy_;
84  const int eidNLayers_;
85  const int eidNClusters_;
86  static constexpr int eidNFeatures_ = 3;
87  tensorflow::Session *eidSession_;
88 
89  std::vector<edm::EDGetTokenT<std::vector<float>>> original_masks_tokens_;
90 
94 
100 };
101 
103  : algoType_(ps.getParameter<edm::ParameterSet>("linkingPSet").getParameter<std::string>("type")),
104  clusters_token_(consumes<std::vector<reco::CaloCluster>>(ps.getParameter<edm::InputTag>("layer_clusters"))),
105  clustersTime_token_(
106  consumes<edm::ValueMap<std::pair<float, float>>>(ps.getParameter<edm::InputTag>("layer_clustersTime"))),
107  regressionAndPid_(ps.getParameter<bool>("regressionAndPid")),
108  tfDnnLabel_(ps.getParameter<std::string>("tfDnnLabel")),
109  tfDnnToken_(esConsumes(edm::ESInputTag("", tfDnnLabel_))),
110  tfSession_(nullptr),
111  eidInputName_(ps.getParameter<std::string>("eid_input_name")),
112  eidOutputNameEnergy_(ps.getParameter<std::string>("eid_output_name_energy")),
113  eidOutputNameId_(ps.getParameter<std::string>("eid_output_name_id")),
114  eidMinClusterEnergy_(ps.getParameter<double>("eid_min_cluster_energy")),
115  eidNLayers_(ps.getParameter<int>("eid_n_layers")),
116  eidNClusters_(ps.getParameter<int>("eid_n_clusters")),
117  eidSession_(nullptr),
118  geometry_token_(esConsumes<CaloGeometry, CaloGeometryRecord, edm::Transition::BeginRun>()),
119  detector_(ps.getParameter<std::string>("detector")),
120  propName_(ps.getParameter<std::string>("propagator")),
121  bfield_token_(esConsumes<MagneticField, IdealMagneticFieldRecord, edm::Transition::BeginRun>()),
122  propagator_token_(
123  esConsumes<Propagator, TrackingComponentsRecord, edm::Transition::BeginRun>(edm::ESInputTag("", propName_))) {
124  // Loop over the edm::VInputTag and append the token to tracksters_tokens_
125  for (auto const &tag : ps.getParameter<std::vector<edm::InputTag>>("tracksters_collections")) {
126  tracksters_tokens_.emplace_back(consumes<std::vector<Trackster>>(tag));
127  }
128  //Loop over the edm::VInputTag of masks and append the token to original_masks_tokens_
129  for (auto const &tag : ps.getParameter<std::vector<edm::InputTag>>("original_masks")) {
130  original_masks_tokens_.emplace_back(consumes<std::vector<float>>(tag));
131  }
132 
133  // New trackster collection after linking
134  produces<std::vector<Trackster>>();
135 
136  // Links
137  produces<std::vector<std::vector<unsigned int>>>();
138  produces<std::vector<std::vector<unsigned int>>>("linkedTracksterIdToInputTracksterId");
139  // LayerClusters Mask
140  produces<std::vector<float>>();
141 
142  auto linkingPSet = ps.getParameter<edm::ParameterSet>("linkingPSet");
143 
144  if (algoType_ == "Skeletons") {
145  std::string detectorName_ = (detector_ == "HFNose") ? "HGCalHFNoseSensitive" : "HGCalEESensitive";
146  hdc_token_ = esConsumes<HGCalDDDConstants, IdealGeometryRecord, edm::Transition::BeginRun>(
147  edm::ESInputTag("", detectorName_));
148  }
149 
150  linkingAlgo_ = TracksterLinkingPluginFactory::get()->create(algoType_, linkingPSet, consumesCollector(), onnxRuntime);
151 }
152 
153 std::unique_ptr<ONNXRuntime> TracksterLinksProducer::initializeGlobalCache(const edm::ParameterSet &iConfig) {
154  auto const &pluginPset = iConfig.getParameter<edm::ParameterSet>("linkingPSet");
155  if (pluginPset.exists("onnxModelPath"))
156  return std::make_unique<ONNXRuntime>(pluginPset.getParameter<edm::FileInPath>("onnxModelPath").fullPath());
157  else
158  return std::unique_ptr<ONNXRuntime>(nullptr);
159 }
160 
162 
164  if (algoType_ == "Skeletons") {
166  hgcons_ = hdc.product();
167  }
168 
171 
174 
175  linkingAlgo_->initialize(hgcons_, rhtools_, bfield, propagator);
176 };
177 
179  auto e_over_h = (t.raw_em_pt() / ((t.raw_pt() - t.raw_em_pt()) != 0. ? (t.raw_pt() - t.raw_em_pt()) : 1.));
180  LogDebug("TracksterLinksProducer")
181  << "\nTrackster raw_pt: " << t.raw_pt() << " raw_em_pt: " << t.raw_em_pt() << " eoh: " << e_over_h
182  << " barycenter: " << t.barycenter() << " eta,phi (baricenter): " << t.barycenter().eta() << ", "
183  << t.barycenter().phi() << " eta,phi (eigen): " << t.eigenvectors(0).eta() << ", " << t.eigenvectors(0).phi()
184  << " pt(eigen): " << std::sqrt(t.eigenvectors(0).Unit().perp2()) * t.raw_energy() << " seedID: " << t.seedID()
185  << " seedIndex: " << t.seedIndex() << " size: " << t.vertices().size() << " average usage: "
186  << (std::accumulate(std::begin(t.vertex_multiplicity()), std::end(t.vertex_multiplicity()), 0.) /
187  (float)t.vertex_multiplicity().size())
188  << " raw_energy: " << t.raw_energy() << " regressed energy: " << t.regressed_energy()
189  << " probs(ga/e/mu/np/cp/nh/am/unk): ";
190  for (auto const &p : t.id_probabilities()) {
191  LogDebug("TracksterLinksProducer") << std::fixed << p << " ";
192  }
193  LogDebug("TracksterLinksProducer") << " sigmas: ";
194  for (auto const &s : t.sigmas()) {
195  LogDebug("TracksterLinksProducer") << s << " ";
196  }
197  LogDebug("TracksterLinksProducer") << std::endl;
198 }
199 
200 void TracksterLinksProducer::energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
201  const tensorflow::Session *eidSession,
202  std::vector<Trackster> &tracksters) const {
203  // Energy regression and particle identification strategy:
204  //
205  // 1. Set default values for regressed energy and particle id for each trackster.
206  // 2. Store indices of tracksters whose total sum of cluster energies is above the
207  // eidMinClusterEnergy_ (GeV) threshold. Inference is not applied for soft tracksters.
208  // 3. When no trackster passes the selection, return.
209  // 4. Create input and output tensors. The batch dimension is determined by the number of
210  // selected tracksters.
211  // 5. Fill input tensors with layer cluster features. Per layer, clusters are ordered descending
212  // by energy. Given that tensor data is contiguous in memory, we can use pointer arithmetic to
213  // fill values, even with batching.
214  // 6. Zero-fill features for empty clusters in each layer.
215  // 7. Batched inference.
216  // 8. Assign the regressed energy and id probabilities to each trackster.
217  //
218  // Indices used throughout this method:
219  // i -> batch element / trackster
220  // j -> layer
221  // k -> cluster
222  // l -> feature
223 
224  // do nothing when no trackster passes the selection (3)
225  int batchSize = (int)tracksters.size();
226  if (batchSize == 0) {
227  return;
228  }
229 
230  for (auto &t : tracksters) {
231  t.setRegressedEnergy(0.f);
232  t.zeroProbabilities();
233  }
234 
235  // create input and output tensors (4)
236  tensorflow::TensorShape shape({batchSize, eidNLayers_, eidNClusters_, eidNFeatures_});
237  tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
239  static constexpr int inputDimension = 4;
240 
241  std::vector<tensorflow::Tensor> outputs;
242  std::vector<std::string> outputNames;
243  if (!eidOutputNameEnergy_.empty()) {
245  }
246  if (!eidOutputNameId_.empty()) {
247  outputNames.push_back(eidOutputNameId_);
248  }
249 
250  // fill input tensor (5)
251  for (int i = 0; i < batchSize; i++) {
252  const Trackster &trackster = tracksters[i];
253 
254  // per layer, we only consider the first eidNClusters_ clusters in terms of
255  // energy, so in order to avoid creating large / nested structures to do
256  // the sorting for an unknown number of total clusters, create a sorted
257  // list of layer cluster indices to keep track of the filled clusters
258  std::vector<int> clusterIndices(trackster.vertices().size());
259  for (int k = 0; k < (int)trackster.vertices().size(); k++) {
260  clusterIndices[k] = k;
261  }
262  sort(clusterIndices.begin(), clusterIndices.end(), [&layerClusters, &trackster](const int &a, const int &b) {
263  return layerClusters[trackster.vertices(a)].energy() > layerClusters[trackster.vertices(b)].energy();
264  });
265 
266  // keep track of the number of seen clusters per layer
267  std::vector<int> seenClusters(eidNLayers_);
268 
269  // loop through clusters by descending energy
270  for (const int &k : clusterIndices) {
271  // get features per layer and cluster and store the values directly in the input tensor
272  const reco::CaloCluster &cluster = layerClusters[trackster.vertices(k)];
273  int j = rhtools_.getLayerWithOffset(cluster.hitsAndFractions()[0].first) - 1;
274  if (j < eidNLayers_ && seenClusters[j] < eidNClusters_) {
275  // get the pointer to the first feature value for the current batch, layer and cluster
276  float *features = &input.tensor<float, inputDimension>()(i, j, seenClusters[j], 0);
277 
278  // fill features
279  *(features++) = float(cluster.energy() / float(trackster.vertex_multiplicity(k)));
280  *(features++) = float(std::abs(cluster.eta()));
281  *(features) = float(cluster.phi());
282 
283  // increment seen clusters
284  seenClusters[j]++;
285  }
286  }
287 
288  // zero-fill features of empty clusters in each layer (6)
289  for (int j = 0; j < eidNLayers_; j++) {
290  for (int k = seenClusters[j]; k < eidNClusters_; k++) {
291  float *features = &input.tensor<float, inputDimension>()(i, j, k, 0);
292  for (int l = 0; l < eidNFeatures_; l++) {
293  *(features++) = 0.f;
294  }
295  }
296  }
297  }
298 
299  // run the inference (7)
301 
302  // store regressed energy per trackster (8)
303  if (!eidOutputNameEnergy_.empty()) {
304  // get the pointer to the energy tensor, dimension is batch x 1
305  float *energy = outputs[0].flat<float>().data();
306 
307  for (int i = 0; i < batchSize; ++i) {
308  float regressedEnergy =
309  tracksters[i].raw_energy() > eidMinClusterEnergy_ ? energy[i] : tracksters[i].raw_energy();
310  tracksters[i].setRegressedEnergy(regressedEnergy);
311  }
312  }
313 
314  // store id probabilities per trackster (8)
315  if (!eidOutputNameId_.empty()) {
316  // get the pointer to the id probability tensor, dimension is batch x id_probabilities.size()
317  int probsIdx = !eidOutputNameEnergy_.empty();
318  float *probs = outputs[probsIdx].flat<float>().data();
319  int probsNumber = tracksters[0].id_probabilities().size();
320  for (int i = 0; i < batchSize; ++i) {
321  tracksters[i].setProbabilities(&probs[i * probsNumber]);
322  }
323  }
324 }
325 
327  linkingAlgo_->setEvent(evt, es);
328 
329  auto resultTracksters = std::make_unique<std::vector<Trackster>>();
330 
331  auto linkedResultTracksters = std::make_unique<std::vector<std::vector<unsigned int>>>();
332 
333  const auto &layerClusters = evt.get(clusters_token_);
334  const auto &layerClustersTimes = evt.get(clustersTime_token_);
335 
336  tfSession_ = es.getData(tfDnnToken_).getSession();
337  // loop over the original_masks_tokens_ and get the original masks collections and multiply them
338  // to get the global mask
339  std::vector<float> original_global_mask(layerClusters.size(), 1.f);
340  for (unsigned int i = 0; i < original_masks_tokens_.size(); ++i) {
341  const auto &tmp_mask = evt.get(original_masks_tokens_[i]);
342  for (unsigned int j = 0; j < tmp_mask.size(); ++j) {
343  original_global_mask[j] *= tmp_mask[j];
344  }
345  }
346 
347  auto resultMask = std::make_unique<std::vector<float>>(original_global_mask);
348 
349  std::vector<edm::Handle<std::vector<Trackster>>> tracksters_h(tracksters_tokens_.size());
350  MultiVectorManager<Trackster> trackstersManager;
351  for (unsigned int i = 0; i < tracksters_tokens_.size(); ++i) {
352  evt.getByToken(tracksters_tokens_[i], tracksters_h[i]);
353  //Fill MultiVectorManager
354  trackstersManager.addVector(*tracksters_h[i]);
355  }
356 
357  // Linking
358  const typename TracksterLinkingAlgoBase::Inputs input(evt, es, layerClusters, layerClustersTimes, trackstersManager);
359  auto linkedTracksterIdToInputTracksterId = std::make_unique<std::vector<std::vector<unsigned int>>>();
360 
361  // LinkTracksters will produce a vector of vector of indices of tracksters that:
362  // 1) are linked together if more than one
363  // 2) are isolated if only one
364  // Result tracksters contains the final version of the trackster collection
365  // linkedTrackstersToInputTrackstersMap contains the mapping between the linked tracksters and the input tracksters
366  linkingAlgo_->linkTracksters(input, *resultTracksters, *linkedResultTracksters, *linkedTracksterIdToInputTracksterId);
367 
368  // Now we need to remove the tracksters that are not linked
369  // We need to emplace_back in the resultTracksters only the tracksters that are linked
370 
371  for (auto const &resultTrackster : *resultTracksters) {
372  for (auto const &clusterIndex : resultTrackster.vertices()) {
373  (*resultMask)[clusterIndex] = 0.f;
374  }
375  }
376 
377  if (regressionAndPid_)
378  energyRegressionAndID(layerClusters, tfSession_, *resultTracksters);
379 
380  assignPCAtoTracksters(*resultTracksters,
382  layerClustersTimes,
384  rhtools_,
385  true);
386 
387  evt.put(std::move(linkedResultTracksters));
388  evt.put(std::move(resultMask));
389  evt.put(std::move(resultTracksters));
390  evt.put(std::move(linkedTracksterIdToInputTracksterId), "linkedTracksterIdToInputTracksterId");
391 }
392 
393 void TracksterLinksProducer::printTrackstersDebug(const std::vector<Trackster> &tracksters, const char *label) const {
394  int counter = 0;
395  LogDebug("TracksterLinksProducer").log([&](auto &log) {
396  for (auto const &t : tracksters) {
397  log << counter++ << " TracksterLinksProducer (" << label << ") obj barycenter: " << t.barycenter()
398  << " eta,phi (baricenter): " << t.barycenter().eta() << ", " << t.barycenter().phi()
399  << " eta,phi (eigen): " << t.eigenvectors(0).eta() << ", " << t.eigenvectors(0).phi()
400  << " pt(eigen): " << std::sqrt(t.eigenvectors(0).Unit().perp2()) * t.raw_energy() << " seedID: " << t.seedID()
401  << " seedIndex: " << t.seedIndex() << " size: " << t.vertices().size() << " average usage: "
402  << (std::accumulate(std::begin(t.vertex_multiplicity()), std::end(t.vertex_multiplicity()), 0.) /
403  (float)t.vertex_multiplicity().size())
404  << " raw_energy: " << t.raw_energy() << " regressed energy: " << t.regressed_energy()
405  << " probs(ga/e/mu/np/cp/nh/am/unk): ";
406  for (auto const &p : t.id_probabilities()) {
407  log << std::fixed << p << " ";
408  }
409  log << " sigmas: ";
410  for (auto const &s : t.sigmas()) {
411  log << s << " ";
412  }
413  log << "\n";
414  }
415  });
416 }
417 
420  edm::ParameterSetDescription linkingDesc;
421  linkingDesc.addNode(edm::PluginDescription<TracksterLinkingPluginFactory>("type", "Skeletons", true));
422 
423  desc.add<edm::ParameterSetDescription>("linkingPSet", linkingDesc);
424  desc.add<std::vector<edm::InputTag>>("tracksters_collections", {edm::InputTag("ticlTrackstersCLUE3DHigh")});
425  desc.add<std::vector<edm::InputTag>>("original_masks",
426  {edm::InputTag("hgcalMergeLayerClusters", "InitialLayerClustersMask")});
427  desc.add<edm::InputTag>("layer_clusters", edm::InputTag("hgcalMergeLayerClusters"));
428  desc.add<edm::InputTag>("layer_clustersTime", edm::InputTag("hgcalMergeLayerClusters", "timeLayerCluster"));
429  desc.add<bool>("regressionAndPid", false);
430  desc.add<std::string>("tfDnnLabel", "tracksterSelectionTf");
431  desc.add<std::string>("eid_input_name", "input");
432  desc.add<std::string>("eid_output_name_energy", "output/regressed_energy");
433  desc.add<std::string>("eid_output_name_id", "output/id_probabilities");
434  desc.add<double>("eid_min_cluster_energy", 2.5);
435  desc.add<int>("eid_n_layers", 50);
436  desc.add<int>("eid_n_clusters", 10);
437  desc.add<std::string>("detector", "HGCAL");
438  desc.add<std::string>("propagator", "PropagatorWithMaterial");
439  descriptions.add("tracksterLinksProducer", desc);
440 }
441 
const HGCalDDDConstants * hgcons_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
ESGetTokenH3DDVariant esConsumes(std::string const &Record, edm::ConsumesCollector &)
Definition: DeDxTools.cc:283
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:133
const std::vector< std::pair< DetId, float > > & hitsAndFractions() const
Definition: CaloCluster.h:210
void produce(edm::Event &, const edm::EventSetup &) override
T const & getData(const ESGetToken< T, R > &iToken) const noexcept(false)
Definition: EventSetup.h:119
bool get(ProductID const &oid, Handle< PROD > &result) const
Definition: Event.h:344
tensorflow::Session * eidSession_
std::string fullPath() const
Definition: FileInPath.cc:161
const edm::ESGetToken< MagneticField, IdealMagneticFieldRecord > bfield_token_
std::vector< edm::EDGetTokenT< std::vector< float > > > original_masks_tokens_
const std::string eidOutputNameEnergy_
static void globalEndJob(const ONNXRuntime *)
TracksterLinksProducer(const edm::ParameterSet &ps, const ONNXRuntime *)
ParameterDescriptionNode * addNode(ParameterDescriptionNode const &node)
void assignPCAtoTracksters(std::vector< Trackster > &tracksters, const std::vector< reco::CaloCluster > &layerClusters, const edm::ValueMap< std::pair< float, float >> &layerClustersTime, double z_limit_em, hgcal::RecHitTools const &rhTools, bool computeLocalTime=false, bool energyWeight=true, bool clean=false, int minLayer=10, int maxLayer=10)
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:526
std::unique_ptr< TracksterLinkingAlgoBase > linkingAlgo_
double phi() const
azimuthal angle of cluster centroid
Definition: CaloCluster.h:184
const edm::EDGetTokenT< std::vector< reco::CaloCluster > > clusters_token_
static constexpr int eidNFeatures_
static std::string const input
Definition: EdmProvDump.cc:50
char const * label
int iEvent
Definition: GenABIO.cc:224
T const * product() const
Definition: ESHandle.h:86
const tensorflow::Session * tfSession_
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
T sqrt(T t)
Definition: SSEVec.h:23
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:271
const edm::ESGetToken< CaloGeometry, CaloGeometryRecord > geometry_token_
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
Transition
Definition: Transition.h:12
double f[11][100]
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
static std::unique_ptr< ONNXRuntime > initializeGlobalCache(const edm::ParameterSet &iConfig)
string inputList
Definition: crabTemplate.py:6
ESHandle< T > getHandle(const ESGetToken< T, R > &iToken) const
Definition: EventSetup.h:130
double energy() const
cluster energy
Definition: CaloCluster.h:149
void printTrackstersDebug(const std::vector< Trackster > &, const char *label) const
std::vector< unsigned int > & vertices()
Definition: Trackster.h:57
void energyRegressionAndID(const std::vector< reco::CaloCluster > &layerClusters, const tensorflow::Session *, std::vector< Trackster > &result) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< float > & vertex_multiplicity()
Definition: Trackster.h:58
std::vector< edm::EDGetTokenT< std::vector< Trackster > > > tracksters_tokens_
double b
Definition: hdecay.h:120
void add(std::string const &label, ParameterSetDescription const &psetDescription)
GlobalPoint getPositionLayer(int layer, bool nose=false) const
Definition: RecHitTools.cc:152
void dumpTrackster(const Trackster &) const
void setGeometry(CaloGeometry const &)
Definition: RecHitTools.cc:79
fixed size matrix
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
double a
Definition: hdecay.h:121
Definition: Common.h:10
edm::ESGetToken< HGCalDDDConstants, IdealGeometryRecord > hdc_token_
double eta() const
pseudorapidity of cluster centroid
Definition: CaloCluster.h:181
#define get
const edm::ESGetToken< TfGraphDefWrapper, TfGraphRecord > tfDnnToken_
const std::string eidOutputNameId_
const edm::ESGetToken< Propagator, TrackingComponentsRecord > propagator_token_
void beginRun(edm::Run const &iEvent, edm::EventSetup const &es) override
const edm::EDGetTokenT< edm::ValueMap< std::pair< float, float > > > clustersTime_token_
def move(src, dest)
Definition: eostools.py:511
unsigned int lastLayerEE(bool nose=false) const
Definition: RecHitTools.h:76
Definition: Run.h:45
#define LogDebug(id)
unsigned int getLayerWithOffset(const DetId &) const
Definition: RecHitTools.cc:381