CMS 3D CMS Logo

SuperclusteringSampleDumper.cc
Go to the documentation of this file.
1 // Original Author: Theo Cuisset
2 // Created: Nov 2023
10 #include <cmath>
11 #include <vector>
12 #include <memory>
13 #include <algorithm>
14 
15 #include <TTree.h>
16 
26 
28 
33 
36 
37 using namespace ticl;
38 
39 class SuperclusteringSampleDumper : public edm::one::EDAnalyzer<edm::one::SharedResources> {
40 public:
42 
43  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
44 
45 private:
46  void beginJob() override;
47  void analyze(const edm::Event&, const edm::EventSetup&) override;
48  bool checkExplainedVarianceRatioCut(ticl::Trackster const& ts) const;
49 
56  float explVarRatioCut_energyBoundary_; // Boundary energy between low and high energy explVarRatio cut threshold
57  float explVarRatioMinimum_lowEnergy_; // Cut on explained variance ratio of tracksters to be considered as candidate, for trackster raw_energy < explVarRatioCut_energyBoundary
58  float explVarRatioMinimum_highEnergy_; // Cut on explained variance ratio of tracksters to be considered as candidate, for trackster raw_energy > explVarRatioCut_energyBoundary
59 
60  TTree* output_tree_;
62  std::unique_ptr<AbstractSuperclusteringDNNInput> dnnInput_;
63  std::vector<std::vector<float>>
64  features_; // Outer index : feature number (split into branches), inner index : inference pair index
65  std::vector<unsigned int> seedTracksterIdx_; // ID of seed trackster used for inference pair
66  std::vector<unsigned int> candidateTracksterIdx_; // ID of candidate trackster used for inference pair
67 
68  std::vector<float>
69  seedTracksterBestAssociationScore_; // Best association score of seed trackster (seedTracksterIdx) with CaloParticle
70  std::vector<long>
71  seedTracksterBestAssociation_simTsIdx_; // Index of SimTrackster that has the best association score to the seedTrackster
72  std::vector<float> seedTracksterBestAssociation_caloParticleEnergy_; // Energy of best associated CaloParticle to seed
73 
74  std::vector<float>
75  candidateTracksterBestAssociationScore_; // Best association score of candidate trackster (seedTracksterIdx) with CaloParticle
76  std::vector<long>
77  candidateTracksterBestAssociation_simTsIdx_; // Index of SimTrackster that has the best association score to the candidate
78 
79  std::vector<float>
80  candidateTracksterAssociationWithSeed_score_; // Association score of candidate trackster with the CaloParticle of seedTracksterBestAssociation_simTsIdx_
81 };
82 
84  : tracksters_clue3d_token_(consumes<std::vector<Trackster>>(ps.getParameter<edm::InputTag>("tracksters"))),
85  tsRecoToSimCP_token_(
86  consumes<ticl::RecoToSimCollectionSimTracksters>(ps.getParameter<edm::InputTag>("recoToSimAssociatorCP"))),
87  deltaEtaWindow_(ps.getParameter<double>("deltaEtaWindow")),
88  deltaPhiWindow_(ps.getParameter<double>("deltaPhiWindow")),
89  seedPtThreshold_(ps.getParameter<double>("seedPtThreshold")),
90  candidateEnergyThreshold_(ps.getParameter<double>("candidateEnergyThreshold")),
91  explVarRatioCut_energyBoundary_(ps.getParameter<double>("candidateEnergyThreshold")),
92  explVarRatioMinimum_lowEnergy_(ps.getParameter<double>("explVarRatioMinimum_lowEnergy")),
93  explVarRatioMinimum_highEnergy_(ps.getParameter<double>("explVarRatioMinimum_highEnergy")),
94  eventId_(),
95  dnnInput_(makeSuperclusteringDNNInputFromString(ps.getParameter<std::string>("dnnInputsVersion"))),
96  features_(dnnInput_->featureCount()) {
97  usesResource("TFileService");
98 }
99 
102  output_tree_ = fs->make<TTree>("superclusteringTraining", "Superclustering training samples");
103  output_tree_->Branch("Event", &eventId_);
104  output_tree_->Branch("seedTracksterIdx", &seedTracksterIdx_);
105  output_tree_->Branch("candidateTracksterIdx", &candidateTracksterIdx_);
106  output_tree_->Branch("seedTracksterBestAssociationScore", &seedTracksterBestAssociationScore_);
107  output_tree_->Branch("seedTracksterBestAssociation_simTsIdx", &seedTracksterBestAssociation_simTsIdx_);
108  output_tree_->Branch("seedTracksterBestAssociation_caloParticleEnergy",
110  output_tree_->Branch("candidateTracksterBestAssociationScore", &candidateTracksterBestAssociationScore_);
111  output_tree_->Branch("candidateTracksterBestAssociation_simTsIdx", &candidateTracksterBestAssociation_simTsIdx_);
112  output_tree_->Branch("candidateTracksterAssociationWithSeed_score", &candidateTracksterAssociationWithSeed_score_);
113  std::vector<std::string> featureNames = dnnInput_->featureNames();
114  assert(featureNames.size() == dnnInput_->featureCount());
115  for (unsigned int i = 0; i < dnnInput_->featureCount(); i++) {
116  output_tree_->Branch(("feature_" + featureNames[i]).c_str(), &features_[i]);
117  }
118 }
119 
125  float explVar_denominator =
126  std::accumulate(std::begin(ts.eigenvalues()), std::end(ts.eigenvalues()), 0.f, std::plus<float>());
127  if (explVar_denominator != 0.) {
128  float explVarRatio = ts.eigenvalues()[0] / explVar_denominator;
130  return explVarRatio >= explVarRatioMinimum_highEnergy_;
131  else
132  return explVarRatio >= explVarRatioMinimum_lowEnergy_;
133  } else
134  return false;
135 }
136 
138  eventId_ = evt.id();
139 
140  edm::Handle<std::vector<Trackster>> inputTracksters;
141  evt.getByToken(tracksters_clue3d_token_, inputTracksters);
142 
144  evt.getByToken(tsRecoToSimCP_token_, assoc_CP_recoToSim);
145 
146  /* Sorting tracksters by decreasing order of pT (out-of-place sort).
147  inputTracksters[trackstersIndicesPt[0]], ..., inputTracksters[trackstersIndicesPt[N]] makes a list of tracksters sorted by decreasing pT
148  Indices into this pT sorted collection will have the suffix _pt. Thus inputTracksters[index] and inputTracksters[trackstersIndicesPt[index_pt]] are correct
149  */
150  std::vector<unsigned int> trackstersIndicesPt(inputTracksters->size());
151  std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0);
152  std::stable_sort(
153  trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) {
154  return (*inputTracksters)[i1].raw_pt() > (*inputTracksters)[i2].raw_pt();
155  });
156 
157  // Order of loops are reversed compared to SuperclusteringProducer (here outer is seed, inner is candidate), for performance reasons.
158  // The same pairs seed-candidate should be present, just in a different order
159  // First loop on seed tracksters
160  for (unsigned int ts_seed_idx_pt = 0; ts_seed_idx_pt < inputTracksters->size(); ts_seed_idx_pt++) {
161  const unsigned int ts_seed_idx_input =
162  trackstersIndicesPt[ts_seed_idx_pt]; // Index of seed trackster in input collection (not in pT sorted collection)
163  Trackster const& ts_seed = (*inputTracksters)[ts_seed_idx_input];
164 
165  if (ts_seed.raw_pt() < seedPtThreshold_)
166  break; // All further seeds will have lower pT than threshold (due to pT sorting)
167 
168  if (!checkExplainedVarianceRatioCut(ts_seed))
169  continue;
170 
171  // Find best associated CaloParticle to the seed
172  auto seed_assocs = assoc_CP_recoToSim->find({inputTracksters, ts_seed_idx_input});
173  if (seed_assocs == assoc_CP_recoToSim->end())
174  continue; // No CaloParticle associations for the current trackster (extremly unlikely)
175  ticl::RecoToSimCollectionSimTracksters::data_type const& seed_assocWithBestScore =
176  *std::min_element(seed_assocs->val.begin(),
177  seed_assocs->val.end(),
180  // assoc_* is of type : std::pair<edmRefIntoSimTracksterCollection, std::pair<sharedEnergy, associationScore>>
181  // Best score is smallest score
182  return assoc_1.second.second < assoc_2.second.second;
183  });
184 
185  // Second loop on superclustering candidates tracksters
186  // Look only at candidate tracksters with lower pT than the seed (so all pairs are only looked at once)
187  for (unsigned int ts_cand_idx_pt = ts_seed_idx_pt + 1; ts_cand_idx_pt < inputTracksters->size(); ts_cand_idx_pt++) {
188  Trackster const& ts_cand = (*inputTracksters)[trackstersIndicesPt[ts_cand_idx_pt]];
189  // Check that the two tracksters are geometrically compatible for superclustering (using deltaEta, deltaPhi window)
190  // There is no need to run training or inference for tracksters very far apart
191  if (!(std::abs(ts_seed.barycenter().Eta() - ts_cand.barycenter().Eta()) < deltaEtaWindow_ &&
192  std::abs(deltaPhi(ts_seed.barycenter().Phi(), ts_cand.barycenter().Phi())) < deltaPhiWindow_ &&
194  continue;
195 
196  std::vector<float> features = dnnInput_->computeVector(ts_seed, ts_cand);
197  assert(features.size() == features_.size());
198  for (unsigned int feature_idx = 0; feature_idx < features_.size(); feature_idx++) {
199  features_[feature_idx].push_back(features[feature_idx]);
200  }
201  seedTracksterIdx_.push_back(trackstersIndicesPt[ts_seed_idx_pt]);
202  candidateTracksterIdx_.push_back(trackstersIndicesPt[ts_cand_idx_pt]);
203 
204  float candidateTracksterBestAssociationScore = 1.; // Best association score of candidate with any CaloParticle
205  long candidateTracksterBestAssociation_simTsIdx = -1; // Corresponding CaloParticle simTrackster index
206  float candidateTracksterAssociationWithSeed_score =
207  1.; // Association score of candidate with CaloParticle best associated with seed
208 
209  // First find associated CaloParticles with candidate
210  auto cand_assocCP = assoc_CP_recoToSim->find(
211  edm::Ref<ticl::TracksterCollection>(inputTracksters, trackstersIndicesPt[ts_cand_idx_pt]));
212  if (cand_assocCP != assoc_CP_recoToSim->end()) {
213  // find the association with best score
214  ticl::RecoToSimCollectionSimTracksters::data_type const& cand_assocWithBestScore =
215  *std::min_element(cand_assocCP->val.begin(),
216  cand_assocCP->val.end(),
219  // assoc_* is of type : std::pair<edmRefIntoSimTracksterCollection, std::pair<sharedEnergy, associationScore>>
220  return assoc_1.second.second < assoc_2.second.second;
221  });
222  candidateTracksterBestAssociationScore = cand_assocWithBestScore.second.second;
223  candidateTracksterBestAssociation_simTsIdx = cand_assocWithBestScore.first.key();
224 
225  // find the association score with the same CaloParticle as the seed
226  auto cand_assocWithSeedCP =
227  std::find_if(cand_assocCP->val.begin(),
228  cand_assocCP->val.end(),
229  [&seed_assocWithBestScore](ticl::RecoToSimCollectionSimTracksters::data_type const& assoc) {
230  // assoc is of type : std::pair<edmRefIntoSimTracksterCollection, std::pair<sharedEnergy, associationScore>>
231  return assoc.first == seed_assocWithBestScore.first;
232  });
233  if (cand_assocWithSeedCP != cand_assocCP->val.end()) {
234  candidateTracksterAssociationWithSeed_score = cand_assocWithSeedCP->second.second;
235  }
236  }
237 
238  seedTracksterBestAssociationScore_.push_back(seed_assocWithBestScore.second.second);
239  seedTracksterBestAssociation_simTsIdx_.push_back(seed_assocWithBestScore.first.key());
240  seedTracksterBestAssociation_caloParticleEnergy_.push_back(seed_assocWithBestScore.first->regressed_energy());
241 
242  candidateTracksterBestAssociationScore_.push_back(candidateTracksterBestAssociationScore);
243  candidateTracksterBestAssociation_simTsIdx_.push_back(candidateTracksterBestAssociation_simTsIdx);
244 
245  candidateTracksterAssociationWithSeed_score_.push_back(candidateTracksterAssociationWithSeed_score);
246  }
247  }
248 
249  output_tree_->Fill();
250  for (auto& feats : features_)
251  feats.clear();
252  seedTracksterIdx_.clear();
253  candidateTracksterIdx_.clear();
260 }
261 
264  desc.add<edm::InputTag>("tracksters", edm::InputTag("ticlTrackstersCLUE3DHigh"))
265  ->setComment("Input trackster collection, same as what is used for superclustering inference.");
266  desc.add<edm::InputTag>("recoToSimAssociatorCP",
267  edm::InputTag("tracksterSimTracksterAssociationLinkingbyCLUE3D", "recoToSim"));
268  desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true),
269  edm::allowedValues<std::string>("v1", "v2"))
270  ->setComment(
271  "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN.");
272  // Cuts are intentionally looser than those used for inference in TracksterLinkingBySuperClustering.cpp
273  desc.add<double>("deltaEtaWindow", 0.2)
274  ->setComment(
275  "Size of delta eta window to consider for superclustering. Seed-candidate pairs outside this window "
276  "are not considered for DNN inference.");
277  desc.add<double>("deltaPhiWindow", 0.7)
278  ->setComment(
279  "Size of delta phi window to consider for superclustering. Seed-candidate pairs outside this window "
280  "are not considered for DNN inference.");
281  desc.add<double>("seedPtThreshold", 3.)
282  ->setComment("Minimum transverse momentum of trackster to be considered as seed of a supercluster");
283  desc.add<double>("candidateEnergyThreshold", 1.5)
284  ->setComment("Minimum energy of trackster to be considered as candidate for superclustering");
285  desc.add<double>("explVarRatioCut_energyBoundary", 50.)
286  ->setComment("Boundary energy between low and high energy explVarRatio cut threshold");
287  desc.add<double>("explVarRatioMinimum_lowEnergy", 0.85)
288  ->setComment(
289  "Cut on explained variance ratio of tracksters to be considered as candidate, "
290  "for trackster raw_energy < explVarRatioCut_energyBoundary");
291  desc.add<double>("explVarRatioMinimum_highEnergy", 0.9)
292  ->setComment(
293  "Cut on explained variance ratio of tracksters to be considered as candidate, "
294  "for trackster raw_energy > explVarRatioCut_energyBoundary");
295  descriptions.add("superclusteringSampleDumper", desc);
296 }
297 
std::vector< unsigned int > candidateTracksterIdx_
const float raw_pt() const
Definition: Trackster.h:156
void analyze(const edm::Event &, const edm::EventSetup &) override
const Vector & barycenter() const
Definition: Trackster.h:159
std::vector< float > seedTracksterBestAssociationScore_
Tag::data_type data_type
insert data type
const edm::EDGetTokenT< std::vector< Trackster > > tracksters_clue3d_token_
std::vector< unsigned int > seedTracksterIdx_
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:526
std::vector< float > candidateTracksterAssociationWithSeed_score_
assert(be >=bs)
example_stream void analyze(const edm::Event &, const edm::EventSetup &) override
const_iterator find(const key_type &k) const
find element with specified reference key
std::vector< float > seedTracksterBestAssociation_caloParticleEnergy_
void beginJob()
Definition: Breakpoints.cc:14
const_iterator end() const
last iterator over the map (read only)
std::vector< std::vector< float > > features_
std::vector< long > seedTracksterBestAssociation_simTsIdx_
std::vector< float > features(const reco::PreId &ecal, const reco::PreId &hcal, double rho, const reco::BeamSpot &spot, noZS::EcalClusterLazyTools &ecalTools)
const float raw_energy() const
Definition: Trackster.h:154
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
SuperclusteringSampleDumper(const edm::ParameterSet &)
const edm::EDGetTokenT< ticl::RecoToSimCollectionSimTracksters > tsRecoToSimCP_token_
double f[11][100]
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
edm::EventID id() const
Definition: EventBase.h:63
bool checkExplainedVarianceRatioCut(ticl::Trackster const &ts) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< float > candidateTracksterBestAssociationScore_
void add(std::string const &label, ParameterSetDescription const &psetDescription)
std::unique_ptr< AbstractSuperclusteringDNNInput > makeSuperclusteringDNNInputFromString(std::string dnnVersion)
HLT enums.
Definition: Common.h:10
std::vector< long > candidateTracksterBestAssociation_simTsIdx_
std::unique_ptr< AbstractSuperclusteringDNNInput > dnnInput_
const std::array< float, 3 > & eigenvalues() const
Definition: Trackster.h:160