CMS 3D CMS Logo

TracksterLinksProducer.cc
Go to the documentation of this file.
1 // Author: Felice Pantaleo, Wahid Redjeb (CERN) - felice.pantaleo@cern.ch, wahid.redjeb@cern.ch
2 // Date: 12/2023
3 #include <memory> // unique_ptr
16 
18 
23 
28 
32 
35 
38 
42 
44 
45 #include "TrackstersPCA.h"
46 
47 using namespace ticl;
48 
50 public:
51  explicit TracksterLinksProducer(const edm::ParameterSet &ps);
53  void produce(edm::Event &, const edm::EventSetup &) override;
54  static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
55 
56  void beginRun(edm::Run const &iEvent, edm::EventSetup const &es) override;
57 
58 private:
59  void printTrackstersDebug(const std::vector<Trackster> &, const char *label) const;
60  void dumpTrackster(const Trackster &) const;
61  void energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
62  const tensorflow::Session *,
63  std::vector<Trackster> &result) const;
64 
65  std::unique_ptr<TracksterLinkingAlgoBase> linkingAlgo_;
66 
67  std::vector<edm::EDGetTokenT<std::vector<Trackster>>> tracksters_tokens_;
70 
71  const bool regressionAndPid_;
74  const tensorflow::Session *tfSession_;
78  const float eidMinClusterEnergy_;
79  const int eidNLayers_;
80  const int eidNClusters_;
81  static constexpr int eidNFeatures_ = 3;
82  tensorflow::Session *eidSession_;
83 
84  std::vector<edm::EDGetTokenT<std::vector<float>>> original_masks_tokens_;
85 
89 
95 };
96 
98  : clusters_token_(consumes<std::vector<reco::CaloCluster>>(ps.getParameter<edm::InputTag>("layer_clusters"))),
99  clustersTime_token_(
100  consumes<edm::ValueMap<std::pair<float, float>>>(ps.getParameter<edm::InputTag>("layer_clustersTime"))),
101  regressionAndPid_(ps.getParameter<bool>("regressionAndPid")),
102  tfDnnLabel_(ps.getParameter<std::string>("tfDnnLabel")),
103  tfDnnToken_(esConsumes(edm::ESInputTag("", tfDnnLabel_))),
104  tfSession_(nullptr),
105  eidInputName_(ps.getParameter<std::string>("eid_input_name")),
106  eidOutputNameEnergy_(ps.getParameter<std::string>("eid_output_name_energy")),
107  eidOutputNameId_(ps.getParameter<std::string>("eid_output_name_id")),
108  eidMinClusterEnergy_(ps.getParameter<double>("eid_min_cluster_energy")),
109  eidNLayers_(ps.getParameter<int>("eid_n_layers")),
110  eidNClusters_(ps.getParameter<int>("eid_n_clusters")),
111  eidSession_(nullptr),
112  geometry_token_(esConsumes<CaloGeometry, CaloGeometryRecord, edm::Transition::BeginRun>()),
113  detector_(ps.getParameter<std::string>("detector")),
114  propName_(ps.getParameter<std::string>("propagator")),
115  bfield_token_(esConsumes<MagneticField, IdealMagneticFieldRecord, edm::Transition::BeginRun>()),
116  propagator_token_(
117  esConsumes<Propagator, TrackingComponentsRecord, edm::Transition::BeginRun>(edm::ESInputTag("", propName_))) {
118  // Loop over the edm::VInputTag and append the token to tracksters_tokens_
119  for (auto const &tag : ps.getParameter<std::vector<edm::InputTag>>("tracksters_collections")) {
120  tracksters_tokens_.emplace_back(consumes<std::vector<Trackster>>(tag));
121  }
122  //Loop over the edm::VInputTag of masks and append the token to original_masks_tokens_
123  for (auto const &tag : ps.getParameter<std::vector<edm::InputTag>>("original_masks")) {
124  original_masks_tokens_.emplace_back(consumes<std::vector<float>>(tag));
125  }
126 
127  // New trackster collection after linking
128  produces<std::vector<Trackster>>();
129 
130  // Links
131  produces<std::vector<std::vector<unsigned int>>>();
132  // LayerClusters Mask
133  produces<std::vector<float>>();
134 
135  auto linkingPSet = ps.getParameter<edm::ParameterSet>("linkingPSet");
136  auto algoType = linkingPSet.getParameter<std::string>("type");
137 
138  if (algoType == "Skeletons") {
139  std::string detectorName_ = (detector_ == "HFNose") ? "HGCalHFNoseSensitive" : "HGCalEESensitive";
140  hdc_token_ = esConsumes<HGCalDDDConstants, IdealGeometryRecord, edm::Transition::BeginRun>(
141  edm::ESInputTag("", detectorName_));
142  }
143 
144  linkingAlgo_ = TracksterLinkingPluginFactory::get()->create(algoType, linkingPSet, consumesCollector());
145 }
146 
149  hgcons_ = hdc.product();
150 
153 
156 
157  linkingAlgo_->initialize(hgcons_, rhtools_, bfield, propagator);
158 };
159 
161  auto e_over_h = (t.raw_em_pt() / ((t.raw_pt() - t.raw_em_pt()) != 0. ? (t.raw_pt() - t.raw_em_pt()) : 1.));
162  LogDebug("TracksterLinksProducer")
163  << "\nTrackster raw_pt: " << t.raw_pt() << " raw_em_pt: " << t.raw_em_pt() << " eoh: " << e_over_h
164  << " barycenter: " << t.barycenter() << " eta,phi (baricenter): " << t.barycenter().eta() << ", "
165  << t.barycenter().phi() << " eta,phi (eigen): " << t.eigenvectors(0).eta() << ", " << t.eigenvectors(0).phi()
166  << " pt(eigen): " << std::sqrt(t.eigenvectors(0).Unit().perp2()) * t.raw_energy() << " seedID: " << t.seedID()
167  << " seedIndex: " << t.seedIndex() << " size: " << t.vertices().size() << " average usage: "
168  << (std::accumulate(std::begin(t.vertex_multiplicity()), std::end(t.vertex_multiplicity()), 0.) /
169  (float)t.vertex_multiplicity().size())
170  << " raw_energy: " << t.raw_energy() << " regressed energy: " << t.regressed_energy()
171  << " probs(ga/e/mu/np/cp/nh/am/unk): ";
172  for (auto const &p : t.id_probabilities()) {
173  LogDebug("TracksterLinksProducer") << std::fixed << p << " ";
174  }
175  LogDebug("TracksterLinksProducer") << " sigmas: ";
176  for (auto const &s : t.sigmas()) {
177  LogDebug("TracksterLinksProducer") << s << " ";
178  }
179  LogDebug("TracksterLinksProducer") << std::endl;
180 }
181 
182 void TracksterLinksProducer::energyRegressionAndID(const std::vector<reco::CaloCluster> &layerClusters,
183  const tensorflow::Session *eidSession,
184  std::vector<Trackster> &tracksters) const {
185  // Energy regression and particle identification strategy:
186  //
187  // 1. Set default values for regressed energy and particle id for each trackster.
188  // 2. Store indices of tracksters whose total sum of cluster energies is above the
189  // eidMinClusterEnergy_ (GeV) threshold. Inference is not applied for soft tracksters.
190  // 3. When no trackster passes the selection, return.
191  // 4. Create input and output tensors. The batch dimension is determined by the number of
192  // selected tracksters.
193  // 5. Fill input tensors with layer cluster features. Per layer, clusters are ordered descending
194  // by energy. Given that tensor data is contiguous in memory, we can use pointer arithmetic to
195  // fill values, even with batching.
196  // 6. Zero-fill features for empty clusters in each layer.
197  // 7. Batched inference.
198  // 8. Assign the regressed energy and id probabilities to each trackster.
199  //
200  // Indices used throughout this method:
201  // i -> batch element / trackster
202  // j -> layer
203  // k -> cluster
204  // l -> feature
205 
206  // do nothing when no trackster passes the selection (3)
207  int batchSize = (int)tracksters.size();
208  if (batchSize == 0) {
209  return;
210  }
211 
212  for (auto &t : tracksters) {
213  t.setRegressedEnergy(0.f);
214  t.zeroProbabilities();
215  }
216 
217  // create input and output tensors (4)
218  tensorflow::TensorShape shape({batchSize, eidNLayers_, eidNClusters_, eidNFeatures_});
219  tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
221  static constexpr int inputDimension = 4;
222 
223  std::vector<tensorflow::Tensor> outputs;
224  std::vector<std::string> outputNames;
225  if (!eidOutputNameEnergy_.empty()) {
227  }
228  if (!eidOutputNameId_.empty()) {
229  outputNames.push_back(eidOutputNameId_);
230  }
231 
232  // fill input tensor (5)
233  for (int i = 0; i < batchSize; i++) {
234  const Trackster &trackster = tracksters[i];
235 
236  // per layer, we only consider the first eidNClusters_ clusters in terms of
237  // energy, so in order to avoid creating large / nested structures to do
238  // the sorting for an unknown number of total clusters, create a sorted
239  // list of layer cluster indices to keep track of the filled clusters
240  std::vector<int> clusterIndices(trackster.vertices().size());
241  for (int k = 0; k < (int)trackster.vertices().size(); k++) {
242  clusterIndices[k] = k;
243  }
244  sort(clusterIndices.begin(), clusterIndices.end(), [&layerClusters, &trackster](const int &a, const int &b) {
245  return layerClusters[trackster.vertices(a)].energy() > layerClusters[trackster.vertices(b)].energy();
246  });
247 
248  // keep track of the number of seen clusters per layer
249  std::vector<int> seenClusters(eidNLayers_);
250 
251  // loop through clusters by descending energy
252  for (const int &k : clusterIndices) {
253  // get features per layer and cluster and store the values directly in the input tensor
254  const reco::CaloCluster &cluster = layerClusters[trackster.vertices(k)];
255  int j = rhtools_.getLayerWithOffset(cluster.hitsAndFractions()[0].first) - 1;
256  if (j < eidNLayers_ && seenClusters[j] < eidNClusters_) {
257  // get the pointer to the first feature value for the current batch, layer and cluster
258  float *features = &input.tensor<float, inputDimension>()(i, j, seenClusters[j], 0);
259 
260  // fill features
261  *(features++) = float(cluster.energy() / float(trackster.vertex_multiplicity(k)));
262  *(features++) = float(std::abs(cluster.eta()));
263  *(features) = float(cluster.phi());
264 
265  // increment seen clusters
266  seenClusters[j]++;
267  }
268  }
269 
270  // zero-fill features of empty clusters in each layer (6)
271  for (int j = 0; j < eidNLayers_; j++) {
272  for (int k = seenClusters[j]; k < eidNClusters_; k++) {
273  float *features = &input.tensor<float, inputDimension>()(i, j, k, 0);
274  for (int l = 0; l < eidNFeatures_; l++) {
275  *(features++) = 0.f;
276  }
277  }
278  }
279  }
280 
281  // run the inference (7)
283 
284  // store regressed energy per trackster (8)
285  if (!eidOutputNameEnergy_.empty()) {
286  // get the pointer to the energy tensor, dimension is batch x 1
287  float *energy = outputs[0].flat<float>().data();
288 
289  for (int i = 0; i < batchSize; ++i) {
290  float regressedEnergy =
291  tracksters[i].raw_energy() > eidMinClusterEnergy_ ? energy[i] : tracksters[i].raw_energy();
292  tracksters[i].setRegressedEnergy(regressedEnergy);
293  }
294  }
295 
296  // store id probabilities per trackster (8)
297  if (!eidOutputNameId_.empty()) {
298  // get the pointer to the id probability tensor, dimension is batch x id_probabilities.size()
299  int probsIdx = !eidOutputNameEnergy_.empty();
300  float *probs = outputs[probsIdx].flat<float>().data();
301  int probsNumber = tracksters[0].id_probabilities().size();
302  for (int i = 0; i < batchSize; ++i) {
303  tracksters[i].setProbabilities(&probs[i * probsNumber]);
304  }
305  }
306 }
307 
309  auto resultTracksters = std::make_unique<std::vector<Trackster>>();
310 
311  auto linkedResultTracksters = std::make_unique<std::vector<std::vector<unsigned int>>>();
312 
313  const auto &layerClusters = evt.get(clusters_token_);
314  const auto &layerClustersTimes = evt.get(clustersTime_token_);
315 
316  tfSession_ = es.getData(tfDnnToken_).getSession();
317  // loop over the original_masks_tokens_ and get the original masks collections and multiply them
318  // to get the global mask
319  std::vector<float> original_global_mask(layerClusters.size(), 1.f);
320  for (unsigned int i = 0; i < original_masks_tokens_.size(); ++i) {
321  const auto &tmp_mask = evt.get(original_masks_tokens_[i]);
322  for (unsigned int j = 0; j < tmp_mask.size(); ++j) {
323  original_global_mask[j] *= tmp_mask[j];
324  }
325  }
326 
327  auto resultMask = std::make_unique<std::vector<float>>(original_global_mask);
328 
329  std::vector<edm::Handle<std::vector<Trackster>>> tracksters_h(tracksters_tokens_.size());
330  MultiVectorManager<Trackster> trackstersManager;
331  for (unsigned int i = 0; i < tracksters_tokens_.size(); ++i) {
332  evt.getByToken(tracksters_tokens_[i], tracksters_h[i]);
333  //Fill MultiVectorManager
334  trackstersManager.addVector(*tracksters_h[i]);
335  }
336 
337  // Linking
338  const typename TracksterLinkingAlgoBase::Inputs input(evt, es, layerClusters, layerClustersTimes, trackstersManager);
339  std::vector<std::vector<unsigned int>> linkedTracksterIdToInputTracksterId;
340 
341  // LinkTracksters will produce a vector of vector of indices of tracksters that:
342  // 1) are linked together if more than one
343  // 2) are isolated if only one
344  // Result tracksters contains the final version of the trackster collection
345  // linkedTrackstersToInputTrackstersMap contains the mapping between the linked tracksters and the input tracksters
346  linkingAlgo_->linkTracksters(input, *resultTracksters, *linkedResultTracksters, linkedTracksterIdToInputTracksterId);
347 
348  // Now we need to remove the tracksters that are not linked
349  // We need to emplace_back in the resultTracksters only the tracksters that are linked
350 
351  for (auto const &resultTrackster : *resultTracksters) {
352  for (auto const &clusterIndex : resultTrackster.vertices()) {
353  (*resultMask)[clusterIndex] = 0.f;
354  }
355  }
356 
357  if (regressionAndPid_)
358  energyRegressionAndID(layerClusters, tfSession_, *resultTracksters);
359 
361  *resultTracksters, layerClusters, layerClustersTimes, rhtools_.getPositionLayer(rhtools_.lastLayerEE()).z(), true);
362 
363  evt.put(std::move(linkedResultTracksters));
364  evt.put(std::move(resultMask));
365  evt.put(std::move(resultTracksters));
366 }
367 
368 void TracksterLinksProducer::printTrackstersDebug(const std::vector<Trackster> &tracksters, const char *label) const {
369  int counter = 0;
370  LogDebug("TracksterLinksProducer").log([&](auto &log) {
371  for (auto const &t : tracksters) {
372  log << counter++ << " TracksterLinksProducer (" << label << ") obj barycenter: " << t.barycenter()
373  << " eta,phi (baricenter): " << t.barycenter().eta() << ", " << t.barycenter().phi()
374  << " eta,phi (eigen): " << t.eigenvectors(0).eta() << ", " << t.eigenvectors(0).phi()
375  << " pt(eigen): " << std::sqrt(t.eigenvectors(0).Unit().perp2()) * t.raw_energy() << " seedID: " << t.seedID()
376  << " seedIndex: " << t.seedIndex() << " size: " << t.vertices().size() << " average usage: "
377  << (std::accumulate(std::begin(t.vertex_multiplicity()), std::end(t.vertex_multiplicity()), 0.) /
378  (float)t.vertex_multiplicity().size())
379  << " raw_energy: " << t.raw_energy() << " regressed energy: " << t.regressed_energy()
380  << " probs(ga/e/mu/np/cp/nh/am/unk): ";
381  for (auto const &p : t.id_probabilities()) {
382  log << std::fixed << p << " ";
383  }
384  log << " sigmas: ";
385  for (auto const &s : t.sigmas()) {
386  log << s << " ";
387  }
388  log << "\n";
389  }
390  });
391 }
392 
395  edm::ParameterSetDescription linkingDesc;
396  linkingDesc.addNode(edm::PluginDescription<TracksterLinkingPluginFactory>("type", "Skeletons", true));
397 
398  desc.add<edm::ParameterSetDescription>("linkingPSet", linkingDesc);
399  desc.add<std::vector<edm::InputTag>>("tracksters_collections", {edm::InputTag("ticlTrackstersCLUE3DHigh")});
400  desc.add<std::vector<edm::InputTag>>("original_masks",
401  {edm::InputTag("hgcalMergeLayerClusters", "InitialLayerClustersMask")});
402  desc.add<edm::InputTag>("layer_clusters", edm::InputTag("hgcalMergeLayerClusters"));
403  desc.add<edm::InputTag>("layer_clustersTime", edm::InputTag("hgcalMergeLayerClusters", "timeLayerCluster"));
404  desc.add<bool>("regressionAndPid", false);
405  desc.add<std::string>("tfDnnLabel", "tracksterSelectionTf");
406  desc.add<std::string>("eid_input_name", "input");
407  desc.add<std::string>("eid_output_name_energy", "output/regressed_energy");
408  desc.add<std::string>("eid_output_name_id", "output/id_probabilities");
409  desc.add<double>("eid_min_cluster_energy", 2.5);
410  desc.add<int>("eid_n_layers", 50);
411  desc.add<int>("eid_n_clusters", 10);
412  desc.add<std::string>("detector", "HGCAL");
413  desc.add<std::string>("propagator", "PropagatorWithMaterial");
414  descriptions.add("tracksterLinksProducer", desc);
415 }
416 
const HGCalDDDConstants * hgcons_
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
ESGetTokenH3DDVariant esConsumes(std::string const &Record, edm::ConsumesCollector &)
Definition: DeDxTools.cc:283
std::vector< NamedTensor > NamedTensorList
Definition: TensorFlow.h:31
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:133
const std::vector< std::pair< DetId, float > > & hitsAndFractions() const
Definition: CaloCluster.h:210
void produce(edm::Event &, const edm::EventSetup &) override
T const & getData(const ESGetToken< T, R > &iToken) const noexcept(false)
Definition: EventSetup.h:119
bool get(ProductID const &oid, Handle< PROD > &result) const
Definition: Event.h:344
tensorflow::Session * eidSession_
TracksterLinksProducer(const edm::ParameterSet &ps)
const edm::ESGetToken< MagneticField, IdealMagneticFieldRecord > bfield_token_
std::vector< edm::EDGetTokenT< std::vector< float > > > original_masks_tokens_
const std::string eidOutputNameEnergy_
ParameterDescriptionNode * addNode(ParameterDescriptionNode const &node)
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:528
std::unique_ptr< TracksterLinkingAlgoBase > linkingAlgo_
double phi() const
azimuthal angle of cluster centroid
Definition: CaloCluster.h:184
const edm::EDGetTokenT< std::vector< reco::CaloCluster > > clusters_token_
static constexpr int eidNFeatures_
static std::string const input
Definition: EdmProvDump.cc:50
char const * label
int iEvent
Definition: GenABIO.cc:224
T const * product() const
Definition: ESHandle.h:86
const tensorflow::Session * tfSession_
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
T sqrt(T t)
Definition: SSEVec.h:23
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
Definition: TensorFlow.cc:281
const edm::ESGetToken< CaloGeometry, CaloGeometryRecord > geometry_token_
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
Transition
Definition: Transition.h:12
double f[11][100]
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
string inputList
Definition: crabTemplate.py:6
ESHandle< T > getHandle(const ESGetToken< T, R > &iToken) const
Definition: EventSetup.h:130
double energy() const
cluster energy
Definition: CaloCluster.h:149
void printTrackstersDebug(const std::vector< Trackster > &, const char *label) const
std::vector< unsigned int > & vertices()
Definition: Trackster.h:57
void energyRegressionAndID(const std::vector< reco::CaloCluster > &layerClusters, const tensorflow::Session *, std::vector< Trackster > &result) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< float > & vertex_multiplicity()
Definition: Trackster.h:58
std::vector< edm::EDGetTokenT< std::vector< Trackster > > > tracksters_tokens_
double b
Definition: hdecay.h:120
void add(std::string const &label, ParameterSetDescription const &psetDescription)
GlobalPoint getPositionLayer(int layer, bool nose=false) const
Definition: RecHitTools.cc:152
void dumpTrackster(const Trackster &) const
void setGeometry(CaloGeometry const &)
Definition: RecHitTools.cc:79
fixed size matrix
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
double a
Definition: hdecay.h:121
Definition: Common.h:10
edm::ESGetToken< HGCalDDDConstants, IdealGeometryRecord > hdc_token_
void assignPCAtoTracksters(std::vector< Trackster > &, const std::vector< reco::CaloCluster > &, const edm::ValueMap< std::pair< float, float >> &, double, bool computeLocalTime=false, bool energyWeight=true)
double eta() const
pseudorapidity of cluster centroid
Definition: CaloCluster.h:181
#define get
const edm::ESGetToken< TfGraphDefWrapper, TfGraphRecord > tfDnnToken_
const std::string eidOutputNameId_
const edm::ESGetToken< Propagator, TrackingComponentsRecord > propagator_token_
void beginRun(edm::Run const &iEvent, edm::EventSetup const &es) override
const edm::EDGetTokenT< edm::ValueMap< std::pair< float, float > > > clustersTime_token_
def move(src, dest)
Definition: eostools.py:511
unsigned int lastLayerEE(bool nose=false) const
Definition: RecHitTools.h:76
Definition: Run.h:45
#define LogDebug(id)
unsigned int getLayerWithOffset(const DetId &) const
Definition: RecHitTools.cc:381