CMS 3D CMS Logo

DeepDoubleXONNXJetTagsProducer.cc
Go to the documentation of this file.
3 
6 
8 
11 
13 
15 
17 
18 #include <algorithm>
19 #include <iostream>
20 #include <fstream>
21 
22 using namespace cms::Ort;
23 
24 class DeepDoubleXONNXJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
25 public:
28 
30 
31  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
32  static void globalEndJob(const ONNXRuntime*);
33 
34 private:
35  typedef std::vector<reco::DeepDoubleXTagInfo> TagInfoCollection;
37 
38  void produce(edm::Event&, const edm::EventSetup&) override;
39 
40  void make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo& taginfo);
41 
43  std::vector<std::string> flav_names_;
44  std::vector<std::string> input_names_;
45  std::vector<std::string> output_names_;
47 
49  unsigned n_npf_, n_features_npf_;
50  unsigned n_cpf_, n_features_cpf_;
51  unsigned n_sv_, n_features_sv_;
52  unsigned kGlobal, kNeutralCandidates, kChargedCandidates, kVertices;
53  std::vector<unsigned> input_sizes_;
54 
55  // hold the input data
57 
58  bool debug_ = false;
59 };
60 
62  const ONNXRuntime* cache)
63  : src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
64  flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
65  input_names_(iConfig.getParameter<std::vector<std::string>>("input_names")),
66  output_names_(iConfig.getParameter<std::vector<std::string>>("output_names")),
67  version_(iConfig.getParameter<std::string>("version")),
68  debug_(iConfig.getUntrackedParameter<bool>("debugMode", false)) {
69  // get output names from flav_names
70  for (const auto& flav_name : flav_names_) {
71  produces<JetTagCollection>(flav_name);
72  }
73 
74  if (version_ == "V2") {
76  n_npf_ = 60;
77  n_features_npf_ = 8;
78  n_cpf_ = 40;
79  n_features_cpf_ = 21;
80  n_sv_ = 5;
81  n_features_sv_ = 7;
83  kGlobal = 0;
86  kVertices = 3;
87  } else {
88  n_features_global_ = 27;
89  n_cpf_ = 60;
90  n_features_cpf_ = 8;
91  n_sv_ = 5;
92  n_features_sv_ = 2;
94  kGlobal = 0;
96  kVertices = 2;
97  }
98 
99  assert(input_names_.size() == input_sizes_.size());
100 }
101 
103 
106  desc.add<edm::InputTag>("src", edm::InputTag("pfDeepDoubleXTagInfos"));
107  desc.add<std::vector<std::string>>("input_names", {"input_1", "input_2", "input_3"});
108  desc.add<std::vector<std::string>>("output_names", {});
109  desc.add<std::string>("version", "V1");
110 
111  using FIP = edm::FileInPath;
115  using PDVersion = edm::ParameterDescription<std::string>;
116  auto flavorCases = [&]() {
117  return "BvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHbb"}, true) and
118  PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDB.onnx"), true)) or
119  "CvL" >> (PDPSD("flav_names", std::vector<std::string>{"probQCD", "probHcc"}, true) and
120  PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDC.onnx"), true)) or
121  "CvB" >> (PDPSD("flav_names", std::vector<std::string>{"probHbb", "probHcc"}, true) and
122  PDFIP("model_path", FIP("RecoBTag/Combined/data/DeepDoubleX/94X/V01/DDCvB.onnx"), true));
123  };
124  auto descBvL(desc);
125  descBvL.ifValue(edm::ParameterDescription<std::string>("flavor", "BvL", true), flavorCases());
126  descriptions.add("pfDeepDoubleBvLJetTags", descBvL);
127 
128  auto descCvL(desc);
129  descCvL.ifValue(edm::ParameterDescription<std::string>("flavor", "CvL", true), flavorCases());
130  descriptions.add("pfDeepDoubleCvLJetTags", descCvL);
131 
132  auto descCvB(desc);
133  descCvB.ifValue(edm::ParameterDescription<std::string>("flavor", "CvB", true), flavorCases());
134  descriptions.add("pfDeepDoubleCvBJetTags", descCvB);
135 }
136 
137 std::unique_ptr<ONNXRuntime> DeepDoubleXONNXJetTagsProducer::initializeGlobalCache(const edm::ParameterSet& iConfig) {
138  return std::make_unique<ONNXRuntime>(iConfig.getParameter<edm::FileInPath>("model_path").fullPath());
139 }
140 
142 
145  iEvent.getByToken(src_, tag_infos);
146 
147  std::vector<std::unique_ptr<JetTagCollection>> output_tags;
148  if (!tag_infos->empty()) {
149  // initialize output collection
150  auto jet_ref = tag_infos->begin()->jet();
151  auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
152  for (std::size_t i = 0; i < flav_names_.size(); i++) {
153  output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
154  }
155 
156  // only need to run on jets with non-empty features
157  auto batch_size = std::count_if(
158  tag_infos->begin(), tag_infos->end(), [](const auto& taginfo) { return !taginfo.features().empty(); });
159 
160  std::vector<float> etas_debug;
161  std::vector<float> outputs;
162  if (batch_size > 0) {
163  // init data storage
164  data_.clear();
165  for (const auto& len : input_sizes_) {
166  data_.emplace_back(batch_size * len, 0);
167  }
168 
169  // convert inputs
170  unsigned idx = 0;
171  for (const auto& taginfo : *tag_infos) {
172  if (!taginfo.features().empty()) {
173  make_inputs(idx, taginfo);
174  if (debug_) {
175  etas_debug.push_back(taginfo.jet()->eta());
176  }
177  ++idx;
178  }
179  }
180 
181  std::sort(input_names_.begin(), input_names_.end()); // input_names order on input is not preserved
182  // run prediction
183  outputs = globalCache()->run(input_names_, data_, {}, output_names_, batch_size)[0];
184 
185  if (debug_) {
186  // Dump inputs to file
187  std::ofstream outfile;
188  outfile.open("test.txt", std::ios_base::app);
189  outfile << iEvent.id().event() << std::endl;
190  outfile << batch_size << std::endl;
191  for (float x : etas_debug)
192  outfile << x << ' ';
193  outfile << std::endl;
194  int _i = 0;
195  for (const std::vector<float>& v : data_) {
196  outfile << "input_" << _i << std::endl;
197  for (float x : v)
198  outfile << x << ' ';
199  outfile << std::endl;
200  _i = _i + 1;
201  }
202  outfile << "outputs" << std::endl;
203  for (float x : outputs)
204  outfile << x << ' ';
205  outfile << std::endl;
206  }
207 
208  assert(outputs.size() == flav_names_.size() * batch_size);
209  }
210 
211  // get the outputs
212  unsigned i_output = 0;
213  for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
214  const auto& taginfo = tag_infos->at(jet_n);
215  const auto& jet_ref = taginfo.jet();
216  for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
217  if (!taginfo.features().empty()) {
218  (*(output_tags[flav_n]))[jet_ref] = outputs[i_output];
219  ++i_output;
220  } else {
221  (*(output_tags[flav_n]))[jet_ref] = -1.;
222  }
223  }
224  }
225  } else {
226  // create empty output collection
227  for (std::size_t i = 0; i < flav_names_.size(); i++) {
228  output_tags.emplace_back(std::make_unique<JetTagCollection>());
229  }
230  }
231 
232  // put into the event
233  for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
234  iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
235  }
236 }
237 
239  const auto& features = taginfo.features();
240  float* ptr = nullptr;
241  const float* start = nullptr;
242  unsigned offset = 0;
243 
244  // DoubleB features
245  offset = i_jet * input_sizes_[kGlobal];
246  ptr = &data_[kGlobal][offset];
247  start = ptr;
248  const auto& tag_info_features = features.tag_info_features;
249  *ptr = tag_info_features.jetNTracks;
250  if (version_ == "V1") {
251  *(++ptr) = tag_info_features.jetNSecondaryVertices;
252  }
253  *(++ptr) = tag_info_features.tau1_trackEtaRel_0;
254  *(++ptr) = tag_info_features.tau1_trackEtaRel_1;
255  if (version_ == "V1") {
256  *(++ptr) = tag_info_features.tau1_trackEtaRel_2;
257  *(++ptr) = tag_info_features.tau2_trackEtaRel_0;
258  *(++ptr) = tag_info_features.tau2_trackEtaRel_1;
259  *(++ptr) = tag_info_features.tau2_trackEtaRel_2;
260  *(++ptr) = tag_info_features.tau1_flightDistance2dSig;
261  *(++ptr) = tag_info_features.tau2_flightDistance2dSig;
262  }
263  *(++ptr) = tag_info_features.tau1_vertexDeltaR;
264  *(++ptr) = tag_info_features.tau1_vertexEnergyRatio;
265  if (version_ == "V1") {
266  *(++ptr) = tag_info_features.tau2_vertexEnergyRatio;
267  *(++ptr) = tag_info_features.tau1_vertexMass;
268  *(++ptr) = tag_info_features.tau2_vertexMass;
269  *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_0;
270  *(++ptr) = tag_info_features.trackSip2dSigAboveBottom_1;
271  *(++ptr) = tag_info_features.trackSip2dSigAboveCharm;
272  *(++ptr) = tag_info_features.trackSip3dSig_0;
273  *(++ptr) = tag_info_features.tau1_trackSip3dSig_0;
274  *(++ptr) = tag_info_features.tau1_trackSip3dSig_1;
275  *(++ptr) = tag_info_features.trackSip3dSig_1;
276  *(++ptr) = tag_info_features.tau2_trackSip3dSig_0;
277  *(++ptr) = tag_info_features.tau2_trackSip3dSig_1;
278  *(++ptr) = tag_info_features.trackSip3dSig_2;
279  *(++ptr) = tag_info_features.trackSip3dSig_3;
280  *(++ptr) = tag_info_features.z_ratio;
281  }
282  assert(start + n_features_global_ - 1 == ptr);
283 
284  // c_pf candidates
285  auto max_c_pf_n = std::min(features.c_pf_features.size(), (std::size_t)n_cpf_);
286  offset = i_jet * input_sizes_[kChargedCandidates];
287  for (std::size_t c_pf_n = 0; c_pf_n < max_c_pf_n; c_pf_n++) {
288  const auto& c_pf_features = features.c_pf_features.at(c_pf_n);
289  ptr = &data_[kChargedCandidates][offset + c_pf_n * n_features_cpf_];
290  start = ptr;
291  *ptr = c_pf_features.btagPf_trackEtaRel;
292  if (version_ == "V1") {
293  *(++ptr) = c_pf_features.btagPf_trackPtRatio;
294  *(++ptr) = c_pf_features.btagPf_trackPParRatio;
295  *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
296  *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
297  *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
298  *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
299  *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
300  } else {
301  *(++ptr) = c_pf_features.btagPf_trackJetDistVal;
302  *(++ptr) = c_pf_features.btagPf_trackPParRatio;
303  *(++ptr) = c_pf_features.btagPf_trackPtRatio;
304  *(++ptr) = c_pf_features.btagPf_trackSip2dSig;
305  *(++ptr) = c_pf_features.btagPf_trackSip2dVal;
306  *(++ptr) = c_pf_features.btagPf_trackSip3dSig;
307  *(++ptr) = c_pf_features.btagPf_trackSip3dVal;
308  *(++ptr) = c_pf_features.deltaR;
309  *(++ptr) = c_pf_features.drminsv;
310  *(++ptr) = c_pf_features.drsubjet1;
311  *(++ptr) = c_pf_features.drsubjet2;
312  *(++ptr) = c_pf_features.dxy;
313  *(++ptr) = c_pf_features.dxysig;
314  *(++ptr) = c_pf_features.dz;
315  *(++ptr) = c_pf_features.dzsig;
316  *(++ptr) = c_pf_features.erel;
317  *(++ptr) = c_pf_features.etarel;
318  *(++ptr) = c_pf_features.chi2;
319  *(++ptr) = c_pf_features.ptrel_noclip;
320  *(++ptr) = c_pf_features.quality;
321  }
322 
323  assert(start + n_features_cpf_ - 1 == ptr);
324  }
325 
326  if (version_ == "V2") {
327  // n_pf candidates
328  auto max_n_pf_n = std::min(features.n_pf_features.size(), (std::size_t)n_cpf_);
329  offset = i_jet * input_sizes_[kNeutralCandidates];
330  for (std::size_t n_pf_n = 0; n_pf_n < max_n_pf_n; n_pf_n++) {
331  const auto& n_pf_features = features.n_pf_features.at(n_pf_n);
332  ptr = &data_[kNeutralCandidates][offset + n_pf_n * n_features_npf_];
333  start = ptr;
334  *ptr = n_pf_features.deltaR_noclip;
335  *(++ptr) = n_pf_features.drminsv;
336  *(++ptr) = n_pf_features.drsubjet1;
337  *(++ptr) = n_pf_features.drsubjet2;
338  *(++ptr) = n_pf_features.erel;
339  *(++ptr) = n_pf_features.hadFrac;
340  *(++ptr) = n_pf_features.ptrel_noclip;
341  *(++ptr) = n_pf_features.puppiw;
342  assert(start + n_features_npf_ - 1 == ptr);
343  }
344  }
345 
346  // sv candidates
347  auto max_sv_n = std::min(features.sv_features.size(), (std::size_t)n_sv_);
348  offset = i_jet * input_sizes_[kVertices];
349  for (std::size_t sv_n = 0; sv_n < max_sv_n; sv_n++) {
350  const auto& sv_features = features.sv_features.at(sv_n);
351  ptr = &data_[kVertices][offset + sv_n * n_features_sv_];
352  start = ptr;
353  if (version_ == "V1") {
354  *ptr = sv_features.d3d;
355  *(++ptr) = sv_features.d3dsig;
356  } else {
357  *ptr = sv_features.costhetasvpv;
358  *(++ptr) = sv_features.deltaR;
359  *(++ptr) = sv_features.dxysig;
360  *(++ptr) = sv_features.mass;
361  *(++ptr) = sv_features.ntracks;
362  *(++ptr) = sv_features.pt;
363  *(++ptr) = sv_features.ptrel;
364  }
365  assert(start + n_features_sv_ - 1 == ptr);
366  }
367 }
368 
369 //define this as a plug-in
const Features & features() const
Definition: start.py:1
T getParameter(std::string const &) const
EventNumber_t event() const
Definition: EventID.h:41
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:125
taginfo
Definition: lumiTag.py:81
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:517
std::vector< reco::DeepDoubleXTagInfo > TagInfoCollection
DeepDoubleXONNXJetTagsProducer(const edm::ParameterSet &, const ONNXRuntime *)
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
const edm::EDGetTokenT< TagInfoCollection > src_
static void globalEndJob(const ONNXRuntime *)
int iEvent
Definition: GenABIO.cc:224
void produce(edm::Event &, const edm::EventSetup &) override
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
T min(T a, T b)
Definition: MathUtil.h:58
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
void make_inputs(unsigned i_jet, const reco::DeepDoubleXTagInfo &taginfo)
edm::EventID id() const
Definition: EventBase.h:59
RefToBaseProd< T > makeRefToBaseProdFrom(RefToBase< T > const &iRef, Event const &iEvent)
HLT enums.
def cache(function)
Definition: utilities.py:3
static void fillDescriptions(edm::ConfigurationDescriptions &)
def move(src, dest)
Definition: eostools.py:511
static std::unique_ptr< ONNXRuntime > initializeGlobalCache(const edm::ParameterSet &)