CMS 3D CMS Logo

DeepFlavourJetTagsProducer.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: ​RecoBTag/​SecondaryVertex
4 // Class: DeepFlavourJetTagsProducer
5 //
14 //
15 // Original Author: Mauro Verzetti (U. Rochester)
16 //
17 //
18 
19 
20 // system include files
21 #include <memory>
22 
23 // user include files
26 
29 
32 
38 
39 //from lwtnn
40 #include "lwtnn/LightweightNeuralNetwork.hh"
41 #include "lwtnn/parse_json.hh"
42 
43 #include <fstream>
44 #include <map>
45 #include <set>
46 #include <vector>
47 #include <string>
48 #include <iostream>
49 
50 #include <boost/algorithm/string.hpp>
51 using namespace std;
52 using namespace reco;
53 //
54 // class declaration
55 //
56 
57 namespace {
58 
59 struct MVAVar {
62  int index;
63  double default_value;
64 };
65 
66 class NeuralNetworkAndConstants {
67 public:
68 
69  NeuralNetworkAndConstants(const edm::ParameterSet&);
70 
71  std::unique_ptr<const lwt::LightweightNeuralNetwork> const& neural_network() const { return neural_network_; }
72  vector<string> const& outputs() const { return outputs_; }
73  bool check_sv_for_defaults() const { return check_sv_for_defaults_; }
74  map<string, string> const& toadd() const { return toadd_; }
75  vector<MVAVar> const& variables() const { return variables_; }
76 
77 private:
78 
79  std::unique_ptr<const lwt::LightweightNeuralNetwork> neural_network_;
80  vector<string> outputs_;
81  bool check_sv_for_defaults_;
82  map<string, string> toadd_;
83  vector<MVAVar> variables_;
84 };
85 
86 class DeepFlavourJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCache<NeuralNetworkAndConstants>> {
87 public:
88  explicit DeepFlavourJetTagsProducer(const edm::ParameterSet&, NeuralNetworkAndConstants const*);
89  ~DeepFlavourJetTagsProducer() override;
90 
91  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
92 
93  static std::unique_ptr<NeuralNetworkAndConstants> initializeGlobalCache(const edm::ParameterSet& iConfig) {
94  return std::make_unique<NeuralNetworkAndConstants>(iConfig);
95  }
96 
97  static void globalEndJob(NeuralNetworkAndConstants*) { }
98 
99 private:
100  typedef std::vector<reco::ShallowTagInfo> INFOS;
101  void beginStream(edm::StreamID) override {}
102  void produce(edm::Event&, const edm::EventSetup&) override;
103  void endStream() override {}
104 
105  // ----------member data ---------------------------
106  const edm::EDGetTokenT< INFOS > src_;
107  lwt::ValueMap inputs_; //typedef of unordered_map<string, float>
108 };
109 
110 //
111 // constants, enums and typedefs
112 //
113 
114 
115 //
116 // static data member definitions
117 //
118 
119 //
120 // constructors and destructor
121 //
122 
123 NeuralNetworkAndConstants::NeuralNetworkAndConstants(const edm::ParameterSet& iConfig) :
124  check_sv_for_defaults_(iConfig.getParameter<bool>("checkSVForDefaults"))
125 {
126  bool mean_padding = iConfig.getParameter<bool>("meanPadding");
127 
128  //parse json
129  edm::FileInPath nnconfig = iConfig.getParameter<edm::FileInPath>("NNConfig");
130  ifstream jsonfile(nnconfig.fullPath());
131  auto config = lwt::parse_json(jsonfile);
132 
133  //create NN and store the output names for the future
134  neural_network_ = std::make_unique<const lwt::LightweightNeuralNetwork>(config.inputs, config.layers, config.outputs);
135 
136  outputs_ = config.outputs;
137  set<string> outset(outputs_.begin(), outputs_.end());
138 
139  //in case we want to merge some different outputs together
140  edm::ParameterSet toaddPSet = iConfig.getParameter<edm::ParameterSet>("toAdd");
141  for(auto const& output : toaddPSet.getParameterNamesForType<string>()) {
142  string target = toaddPSet.getParameter<string>(output);
143  if(outset.find(output) == outset.end())
144  throw cms::Exception("RuntimeError") << "The required output: " << output << " to be added to " << target << " could not be found among the NN outputs" << endl;
145  if(outset.find(target) == outset.end())
146  throw cms::Exception("RuntimeError") << "The required output: " << target << ", target of addition of " << output << " could not be found among the NN outputs" << endl;
147  toadd_[output] = target;
148  }
149 
150  //get the set-up for the inputs
151  for(auto const& input : config.inputs) {
152  MVAVar var;
153  var.name = input.name;
154  //two paradigms
155  vector<string> tokens;
156  if (var.name != "Jet_JP" && var.name != "Jet_JBP" && var.name != "Jet_SoftMu" && var.name != "Jet_SoftEl"){boost::split(tokens,var.name,boost::is_any_of("_"));}
157  else {tokens.push_back(var.name);}
158  if(tokens.empty()) {
159  throw cms::Exception("RuntimeError") << "I could not parse properly " << input.name << " as input feature" << std::endl;
160  }
161  var.id = reco::getTaggingVariableName(tokens.at(0));
162  //die grafully if the tagging variable is not found!
163  if(var.id == reco::btau::lastTaggingVariable) {
164  throw cms::Exception("ValueError") << "I could not find the TaggingVariable named " << tokens.at(0)
165  << " from the NN input variable: " << input.name
166  << ". Please check the spelling" << std::endl;
167  }
168  var.index = (tokens.size() == 2) ? stoi(tokens.at(1)) : -1;
169  var.default_value = (mean_padding) ? 0. : -1*input.offset; //set default to -offset so that when scaling (val+offset)*scale the outcome is 0
170  //for mean padding it is set to zero so that undefined values are assigned -mean/scale
171 
172  variables_.push_back(var);
173  }
174 }
175 
176 DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet& iConfig, NeuralNetworkAndConstants const* gc) :
177  src_( consumes< INFOS >(iConfig.getParameter<edm::InputTag>("src")) ),
178  inputs_()
179 {
180  //produce one output kind per node
181  for(auto const& outnode : gc->outputs()) {
182  if(gc->toadd().find(outnode) == gc->toadd().end()){ //produce output only if does not get added
183  produces<JetTagCollection>(outnode);
184  }
185  }
186 }
187 
188 DeepFlavourJetTagsProducer::~DeepFlavourJetTagsProducer()
189 {
190 }
191 
192 
193 //
194 // member functions
195 //
196 
197 // ------------ method called to produce the data ------------
198 void
199 DeepFlavourJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup)
200 {
201  NeuralNetworkAndConstants const* gc = globalCache();
202  vector<string> const& outputs = gc->outputs();
203  map<string, string> const& toadd = gc->toadd();
204 
205  // get input TagInfos
206  edm::Handle<INFOS> taginfos;
207  iEvent.getByToken(src_, taginfos);
208 
209  // create the output collection
210  // which is a "map" RefToBase<Jet> --> float
211  vector< std::unique_ptr<JetTagCollection> > output_tags;
212  output_tags.reserve(outputs.size());
213  for(size_t i = 0; i < outputs.size(); ++i) {
214  if(!taginfos->empty()) {
215  edm::RefToBase<Jet> jj = taginfos->begin()->jet();
216  output_tags.push_back(
217  std::make_unique<JetTagCollection>(
218  edm::makeRefToBaseProdFrom(jj, iEvent)
219  )
220  );
221  } else {
222  output_tags.push_back(
223  std::make_unique<JetTagCollection>()
224  );
225  }
226  }
227 
228  int naninput = 0;
229  int nanoutput = 0;
230 
231  // loop over TagInfos
232  for(auto& info : *(taginfos)) {
233  //convert the taginfo into the value map in the appropriate way
234  TaggingVariableList vars = info.taggingVariables();
235  //if there are no tracks there's no point in doing it
236  bool notracks = (vars.get(reco::btau::jetNSelectedTracks) == 0);
237  bool novtx = (vars.get(reco::btau::jetNSecondaryVertices) == 0);
238  bool defaulted = (gc->check_sv_for_defaults()) ? (notracks && novtx) : notracks;
239  lwt::ValueMap nnout; //returned value
240 
241  if(!defaulted) {
242  for(auto const& var : gc->variables()) {
243  if(var.index >= 0){
244  std::vector<float> vals = vars.getList(var.id, false);
245  inputs_[var.name] = (((int) vals.size()) > var.index) ? vals.at(var.index) : var.default_value;
246  }
247  //single value tagging var
248  else {
249  inputs_[var.name] = vars.get(var.id, var.default_value);
250  }
251 
252  //count if the input is nan
253  if(std::isnan(inputs_[var.name])) {
254  naninput++;
255  }
256  }
257 
258  //compute NN output(s)
259  nnout = gc->neural_network()->compute(inputs_);
260 
261  //merge outputs
262  for(auto const& entry : toadd) {
263  nnout[entry.second] += nnout[entry.first];
264  }
265 
266  //count if the output is nan
267  for(const auto& entry : nnout) {
268  if(std::isnan(entry.second)) {
269  nanoutput++;
270  }
271  }
272  }
273 
274  //ket the maps key
275  edm::RefToBase<Jet> key = info.jet();
276 
277  //dump the NN output(s)
278  for(size_t i = 0; i < outputs.size(); ++i) {
279  (*output_tags[i])[key] = (defaulted) ? -1 : nnout[outputs[i]];
280  }
281  }
282 
283  if( naninput + nanoutput > 0 ) {
284  edm::LogWarning("ValueError") << "The NN encountered " << naninput << " nan input TagInfo values and produced " << nanoutput << " nan output values";
285  }
286 
287  // put the output in the event
288  for(size_t i = 0; i < outputs.size(); ++i) {
289  if(toadd.find(outputs[i]) == toadd.end()) {
290  iEvent.put(std::move(output_tags[i]), outputs[i]);
291  }
292  }
293 }
294 
295 // ------------ method fills 'descriptions' with the allowed parameters for the module ------------
296 void
298  //The following says we do not know what parameters are allowed so do no validation
299  // Please change this to state exactly what you do use, even if it is no parameters
301  desc.setUnknown();
302  descriptions.addDefault(desc);
303 }
304 } // end unnamed namespace
305 
306 //define this as a plug-in
T getParameter(std::string const &) const
static const TGPicture * info(bool iBackgroundIsBlack)
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:125
bool getByToken(EDGetToken token, Handle< PROD > &result) const
Definition: Event.h:517
Definition: config.py:1
static std::string const input
Definition: EdmProvDump.cc:48
int iEvent
Definition: GenABIO.cc:224
#define DEFINE_FWK_MODULE(type)
Definition: MakerMacros.h:16
void addDefault(ParameterSetDescription const &psetDescription)
bool isnan(float x)
Definition: math.h:13
std::vector< TaggingValue > getList(TaggingVariableName tag, bool throwOnEmptyList=true) const
TaggingValue get(TaggingVariableName tag) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
TaggingVariableName getTaggingVariableName(const std::string &name)
RefToBaseProd< T > makeRefToBaseProdFrom(RefToBase< T > const &iRef, Event const &iEvent)
fixed size matrix
HLT enums.
vars
Definition: DeepTauId.cc:77
double split
Definition: MVATrainer.cc:139
def move(src, dest)
Definition: eostools.py:511