CMS 3D CMS Logo

BoostedJetONNXJetTagsProducer.cc
Go to the documentation of this file.
3 
6 
8 
11 
13 
15 
17 
19 
20 #include <iostream>
21 #include <fstream>
22 #include <algorithm>
23 #include <numeric>
24 #include <nlohmann/json.hpp>
25 
26 using namespace cms::Ort;
27 using namespace btagbtvdeep;
28 
29 class BoostedJetONNXJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
30 public:
33 
35 
36  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet &);
37  static void globalEndJob(const ONNXRuntime *);
38 
39 private:
40  typedef std::vector<reco::DeepBoostedJetTagInfo> TagInfoCollection;
42 
43  void produce(edm::Event &, const edm::EventSetup &) override;
44 
45  std::vector<float> center_norm_pad(const std::vector<float> &input,
46  float center,
47  float scale,
48  unsigned min_length,
49  unsigned max_length,
50  float pad_value = 0,
51  float replace_inf_value = 0,
52  float min = 0,
53  float max = -1);
54  void make_inputs(const reco::DeepBoostedJetTagInfo &taginfo);
55 
57  std::vector<std::string> flav_names_; // names of the output scores
58  std::vector<std::string> input_names_; // names of each input group - the ordering is important!
59  std::vector<std::vector<int64_t>> input_shapes_; // shapes of each input group (-1 for dynamic axis)
60  std::vector<unsigned> input_sizes_; // total length of each input vector
61  std::unordered_map<std::string, PreprocessParams> prep_info_map_; // preprocessing info for each input group
62 
64 
65  bool debug_ = false;
66 };
67 
69  : src_(consumes<TagInfoCollection>(iConfig.getParameter<edm::InputTag>("src"))),
70  flav_names_(iConfig.getParameter<std::vector<std::string>>("flav_names")),
71  debug_(iConfig.getUntrackedParameter<bool>("debugMode", false)) {
72  // load preprocessing info
73  auto json_path = iConfig.getParameter<std::string>("preprocess_json");
74  if (!json_path.empty()) {
75  // use preprocessing json file if available
76  std::ifstream ifs(edm::FileInPath(json_path).fullPath());
78  js.at("input_names").get_to(input_names_);
79  for (const auto &group_name : input_names_) {
80  const auto &group_pset = js.at(group_name);
81  auto &prep_params = prep_info_map_[group_name];
82  group_pset.at("var_names").get_to(prep_params.var_names);
83  if (group_pset.contains("var_length")) {
84  prep_params.min_length = group_pset.at("var_length");
85  prep_params.max_length = prep_params.min_length;
86  } else {
87  prep_params.min_length = group_pset.at("min_length");
88  prep_params.max_length = group_pset.at("max_length");
89  input_shapes_.push_back({1, (int64_t)prep_params.var_names.size(), -1});
90  }
91  const auto &var_info_pset = group_pset.at("var_infos");
92  for (const auto &var_name : prep_params.var_names) {
93  const auto &var_pset = var_info_pset.at(var_name);
94  double median = var_pset.at("median");
95  double norm_factor = var_pset.at("norm_factor");
96  double replace_inf_value = var_pset.at("replace_inf_value");
97  double lower_bound = var_pset.at("lower_bound");
98  double upper_bound = var_pset.at("upper_bound");
99  double pad = var_pset.contains("pad") ? double(var_pset.at("pad")) : 0;
100  prep_params.var_info_map[var_name] =
101  PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, pad);
102  }
103 
104  // create data storage with a fixed size vector initilized w/ 0
105  const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
106  data_.emplace_back(len, 0);
107  }
108  } else {
109  // otherwise use the PSet in the python config file
110  const auto &prep_pset = iConfig.getParameterSet("preprocessParams");
111  input_names_ = prep_pset.getParameter<std::vector<std::string>>("input_names");
112  for (const auto &group_name : input_names_) {
113  const auto &group_pset = prep_pset.getParameterSet(group_name);
114  auto &prep_params = prep_info_map_[group_name];
115  prep_params.var_names = group_pset.getParameter<std::vector<std::string>>("var_names");
116  prep_params.min_length = group_pset.getParameter<unsigned>("var_length");
117  prep_params.max_length = prep_params.min_length;
118  const auto &var_info_pset = group_pset.getParameterSet("var_infos");
119  for (const auto &var_name : prep_params.var_names) {
120  const auto &var_pset = var_info_pset.getParameterSet(var_name);
121  double median = var_pset.getParameter<double>("median");
122  double norm_factor = var_pset.getParameter<double>("norm_factor");
123  double replace_inf_value = var_pset.getParameter<double>("replace_inf_value");
124  double lower_bound = var_pset.getParameter<double>("lower_bound");
125  double upper_bound = var_pset.getParameter<double>("upper_bound");
126  prep_params.var_info_map[var_name] =
127  PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, 0);
128  }
129 
130  // create data storage with a fixed size vector initiliazed w/ 0
131  const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
132  data_.emplace_back(len, 0);
133  }
134  }
135 
136  if (debug_) {
137  for (unsigned i = 0; i < input_names_.size(); ++i) {
138  const auto &group_name = input_names_.at(i);
139  if (!input_shapes_.empty()) {
140  std::cout << group_name << "\nshapes: ";
141  for (const auto &x : input_shapes_.at(i)) {
142  std::cout << x << ", ";
143  }
144  }
145  std::cout << "\nvariables: ";
146  for (const auto &x : prep_info_map_.at(group_name).var_names) {
147  std::cout << x << ", ";
148  }
149  std::cout << "\n";
150  }
151  std::cout << "flav_names: ";
152  for (const auto &flav_name : flav_names_) {
153  std::cout << flav_name << ", ";
154  }
155  std::cout << "\n";
156  }
157 
158  // get output names from flav_names
159  for (const auto &flav_name : flav_names_) {
160  produces<JetTagCollection>(flav_name);
161  }
162 }
163 
165 
167  // pfDeepBoostedJetTags
169  desc.add<edm::InputTag>("src", edm::InputTag("pfDeepBoostedJetTagInfos"));
170  desc.add<std::string>("preprocess_json", "");
171  // `preprocessParams` is deprecated -- use the preprocessing json file instead
173  preprocessParams.setAllowAnything();
174  preprocessParams.setComment("`preprocessParams` is deprecated, please use `preprocess_json` instead.");
175  desc.addOptional<edm::ParameterSetDescription>("preprocessParams", preprocessParams);
176  desc.add<edm::FileInPath>("model_path",
177  edm::FileInPath("RecoBTag/Combined/data/DeepBoostedJet/V02/full/resnet.onnx"));
178  desc.add<std::vector<std::string>>("flav_names",
179  std::vector<std::string>{
180  "probTbcq",
181  "probTbqq",
182  "probTbc",
183  "probTbq",
184  "probWcq",
185  "probWqq",
186  "probZbb",
187  "probZcc",
188  "probZqq",
189  "probHbb",
190  "probHcc",
191  "probHqqqq",
192  "probQCDbb",
193  "probQCDcc",
194  "probQCDb",
195  "probQCDc",
196  "probQCDothers",
197  });
198  desc.addOptionalUntracked<bool>("debugMode", false);
199 
200  descriptions.addWithDefaultLabel(desc);
201 }
202 
203 std::unique_ptr<ONNXRuntime> BoostedJetONNXJetTagsProducer::initializeGlobalCache(const edm::ParameterSet &iConfig) {
204  return std::make_unique<ONNXRuntime>(iConfig.getParameter<edm::FileInPath>("model_path").fullPath());
205 }
206 
208 
211  iEvent.getByToken(src_, tag_infos);
212 
213  // initialize output collection
214  std::vector<std::unique_ptr<JetTagCollection>> output_tags;
215  if (!tag_infos->empty()) {
216  auto jet_ref = tag_infos->begin()->jet();
217  auto ref2prod = edm::makeRefToBaseProdFrom(jet_ref, iEvent);
218  for (std::size_t i = 0; i < flav_names_.size(); i++) {
219  output_tags.emplace_back(std::make_unique<JetTagCollection>(ref2prod));
220  }
221  } else {
222  for (std::size_t i = 0; i < flav_names_.size(); i++) {
223  output_tags.emplace_back(std::make_unique<JetTagCollection>());
224  }
225  }
226 
227  for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
228  const auto &taginfo = (*tag_infos)[jet_n];
229  std::vector<float> outputs(flav_names_.size(), 0); // init as all zeros
230 
231  if (!taginfo.features().empty()) {
232  // convert inputs
234  // run prediction and get outputs
235  outputs = globalCache()->run(input_names_, data_, input_shapes_)[0];
236  assert(outputs.size() == flav_names_.size());
237  }
238 
239  const auto &jet_ref = tag_infos->at(jet_n).jet();
240  for (std::size_t flav_n = 0; flav_n < flav_names_.size(); flav_n++) {
241  (*(output_tags[flav_n]))[jet_ref] = outputs[flav_n];
242  }
243  }
244 
245  if (debug_) {
246  std::cout << "=== " << iEvent.id().run() << ":" << iEvent.id().luminosityBlock() << ":" << iEvent.id().event()
247  << " ===" << std::endl;
248  for (unsigned jet_n = 0; jet_n < tag_infos->size(); ++jet_n) {
249  const auto &jet_ref = tag_infos->at(jet_n).jet();
250  std::cout << " - Jet #" << jet_n << ", pt=" << jet_ref->pt() << ", eta=" << jet_ref->eta()
251  << ", phi=" << jet_ref->phi() << std::endl;
252  for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
253  std::cout << " " << flav_names_.at(flav_n) << " = " << (*(output_tags.at(flav_n)))[jet_ref] << std::endl;
254  }
255  }
256  }
257 
258  // put into the event
259  for (std::size_t flav_n = 0; flav_n < flav_names_.size(); ++flav_n) {
260  iEvent.put(std::move(output_tags[flav_n]), flav_names_[flav_n]);
261  }
262 }
263 
264 std::vector<float> BoostedJetONNXJetTagsProducer::center_norm_pad(const std::vector<float> &input,
265  float center,
266  float norm_factor,
267  unsigned min_length,
268  unsigned max_length,
269  float pad_value,
270  float replace_inf_value,
271  float min,
272  float max) {
273  // do variable shifting/scaling/padding/clipping in one go
274 
275  assert(min <= pad_value && pad_value <= max);
276  assert(min_length <= max_length);
277 
278  unsigned target_length = std::clamp((unsigned)input.size(), min_length, max_length);
279  std::vector<float> out(target_length, pad_value);
280  for (unsigned i = 0; i < input.size() && i < target_length; ++i) {
281  out[i] = std::clamp((catch_infs(input[i], replace_inf_value) - center) * norm_factor, min, max);
282  }
283  return out;
284 }
285 
287  for (unsigned igroup = 0; igroup < input_names_.size(); ++igroup) {
288  const auto &group_name = input_names_[igroup];
289  const auto &prep_params = prep_info_map_.at(group_name);
290  auto &group_values = data_[igroup];
291  group_values.resize(input_sizes_[igroup]);
292  // first reset group_values to 0
293  std::fill(group_values.begin(), group_values.end(), 0);
294  unsigned curr_pos = 0;
295  // transform/pad
296  for (unsigned i = 0; i < prep_params.var_names.size(); ++i) {
297  const auto &varname = prep_params.var_names[i];
298  const auto &raw_value = taginfo.features().get(varname);
299  const auto &info = prep_params.info(varname);
300  auto val = center_norm_pad(raw_value,
301  info.center,
302  info.norm_factor,
303  prep_params.min_length,
304  prep_params.max_length,
305  info.pad,
306  info.replace_inf_value,
307  info.lower_bound,
308  info.upper_bound);
309  std::copy(val.begin(), val.end(), group_values.begin() + curr_pos);
310  curr_pos += val.size();
311  if (i == 0 && (!input_shapes_.empty())) {
312  input_shapes_[igroup][2] = val.size();
313  }
314 
315  if (debug_) {
316  std::cout << " -- var=" << varname << ", center=" << info.center << ", scale=" << info.norm_factor
317  << ", replace=" << info.replace_inf_value << ", pad=" << info.pad << std::endl;
318  for (const auto &v : val) {
319  std::cout << v << ",";
320  }
321  std::cout << std::endl;
322  }
323  }
324  group_values.resize(curr_pos);
325  }
326 }
327 
328 //define this as a plug-in
RunNumber_t run() const
Definition: EventID.h:39
const Features & features() const
const edm::EDGetTokenT< TagInfoCollection > src_
T getParameter(std::string const &) const
EventNumber_t event() const
Definition: EventID.h:41
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
static const TGPicture * info(bool iBackgroundIsBlack)
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:125
ParameterDescriptionBase * addOptional(U const &iLabel, T const &value)
std::unordered_map< std::string, PreprocessParams > prep_info_map_
def copy(args, dbName)
const float catch_infs(const float in, const float replace_value=0.)
Definition: deep_helpers.cc:21
taginfo
Definition: lumiTag.py:81
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:517
void make_inputs(const reco::DeepBoostedJetTagInfo &taginfo)
void produce(edm::Event &, const edm::EventSetup &) override
std::vector< std::vector< float > > FloatArrays
Definition: ONNXRuntime.h:23
static void globalEndJob(const ONNXRuntime *)
LuminosityBlockNumber_t luminosityBlock() const
Definition: EventID.h:40
std::vector< reco::DeepBoostedJetTagInfo > TagInfoCollection
static void fillDescriptions(edm::ConfigurationDescriptions &)
static std::string const input
Definition: EdmProvDump.cc:48
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
static std::unique_ptr< ONNXRuntime > initializeGlobalCache(const edm::ParameterSet &)
T min(T a, T b)
Definition: MathUtil.h:58
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
def parse(path, config)
Definition: dumpparser.py:13
ParameterSet const & getParameterSet(std::string const &) const
BoostedJetONNXJetTagsProducer(const edm::ParameterSet &, const ONNXRuntime *)
edm::EventID id() const
Definition: EventBase.h:59
RefToBaseProd< T > makeRefToBaseProdFrom(RefToBase< T > const &iRef, Event const &iEvent)
HLT enums.
def cache(function)
Definition: utilities.py:3
std::vector< std::vector< int64_t > > input_shapes_
ParameterDescriptionBase * addOptionalUntracked(U const &iLabel, T const &value)
std::vector< float > center_norm_pad(const std::vector< float > &input, float center, float scale, unsigned min_length, unsigned max_length, float pad_value=0, float replace_inf_value=0, float min=0, float max=-1)
def move(src, dest)
Definition: eostools.py:511