CMS 3D CMS Logo

StringBasedNTupler.h
Go to the documentation of this file.
1 #ifndef StringBasedNTupler_NTupler_H
2 #define StringBasedNTupler_NTupler_H
3 
4 //#include "PhysicsTools/UtilAlgos/interface/UpdaterService.h"
5 
11 
12 //#include "PhysicsTools/UtilAlgos/interface/TFileService.h"
14 #include "TTree.h"
15 #include "TBranch.h"
16 #include "TFile.h"
17 
19 
26 
28 
31 
32 //#define StringBasedNTuplerPrecision float;
33 
34 #include <memory>
35 #include <string>
36 #include <sstream>
37 
43 
44 // LHE Event
46 
47 class TreeBranch {
48 public:
49  TreeBranch() : class_(""), expr_(""), order_(""), selection_(""), maxIndexName_(""), branchAlias_("") {}
52  : class_(C), src_(S), expr_(E), order_(O), selection_(SE), maxIndexName_(Mi), branchAlias_(Ba) {
53  branchTitle_ = E + " calculated on " + C + " object from " + S.encode();
54  if (!O.empty())
55  branchTitle_ += " ordered according to " + O;
56  if (!SE.empty())
57  branchTitle_ += " selecting on " + SE;
58  edm::LogInfo("TreeBranch") << "the branch with alias: " << branchAlias_ << " corresponds to: " << branchTitle_;
59  }
60 
61  const std::string& className() const { return class_; }
62  const edm::InputTag& src() const { return src_; }
63  const std::string& expr() const { return expr_; }
64  const std::string& order() const { return order_; }
65  const std::string& selection() const { return selection_; }
66  const std::string& maxIndexName() const { return maxIndexName_; }
67  const std::string branchName() const {
69  std::replace(name.begin(), name.end(), '_', '0');
70  return std::string(name);
71  }
72  const std::string& branchAlias() const { return branchAlias_; }
73  const std::string& branchTitle() const { return branchTitle_; }
74  typedef std::unique_ptr<std::vector<float> > value;
75  value branch(const edm::Event& iEvent);
76 
77  std::vector<float>** dataHolderPtrAdress() { return &dataHolderPtr_; }
78  std::vector<float>* dataHolderPtr() { return dataHolderPtr_; }
79  void assignDataHolderPtr(std::vector<float>* data) { dataHolderPtr_ = data; }
80 
81 private:
90 
91  std::vector<float>* dataHolderPtr_;
92 };
93 
94 template <typename Object>
96 public:
98  value operator()() { return std::move(value_); }
99 
101  const float defaultValue = 0.;
102  // grab the object
104  iEvent.getByLabel(B.src(), oH);
105  //empty vector if product not found
106  if (oH.failedToGet()) {
107  edm::LogError("StringBranchHelper") << "cannot open: " << B.src();
108  value_.reset(new std::vector<float>(0));
109  } else {
110  //parser for the object expression
112  //allocate enough memory for the data holder
113  value_.reset(new std::vector<float>(1));
114  try {
115  (*value_)[0] = (expr)(*oH);
116  } catch (...) {
117  LogDebug("StringLeaveHelper") << "could not evaluate expression: " << B.expr()
118  << " on class: " << B.className();
119  (*value_)[0] = defaultValue;
120  }
121  }
122  }
123 
124 private:
125  value value_;
126 };
127 
128 template <typename Object, typename Collection = std::vector<Object> >
130 public:
132  value operator()() { return std::move(value_); }
133 
135  const float defaultValue = 0.;
136 
137  // grab the collection
139  iEvent.getByLabel(B.src(), oH);
140 
141  //empty vector if product not found
142  if (oH.failedToGet()) {
143  if (!(iEvent.isRealData() && B.className() == "reco::GenParticle")) { //don't output genparticle error in data
144  edm::LogError("StringBranchHelper") << "cannot open: " << B.src() << " " << B.className();
145  }
146  value_.reset(new std::vector<float>());
147  } else {
148  //parser for the object expression
150  //allocate enough memory for the data holder
151  value_.reset(new std::vector<float>());
152  value_->reserve(oH->size());
153 
155  if (!B.selection().empty()) {
156  //std::cout<<"trying to get to a selection"<<std::endl;
157  selection = new StringCutObjectSelector<Object>(B.selection());
158  //std::cout<<"got the objet"<<std::endl;
159  }
160  uint i_end = oH->size();
161  //sort things first if requested
162  if (!B.order().empty()) {
164  // allocate a vector of pointers (we are using view) to be sorted
165  std::vector<const Object*> copyToSort(oH->size());
166  for (uint i = 0; i != i_end; ++i)
167  copyToSort[i] = &(*oH)[i];
168  std::sort(copyToSort.begin(), copyToSort.end(), sortByStringFunction<Object>(&order));
169  //then loop and fill
170  for (uint i = 0; i != i_end; ++i) {
171  //try and catch is necessary because ...
172  try {
173  if (selection && !((*selection)(*(copyToSort)[i])))
174  continue;
175  value_->push_back((expr)(*(copyToSort)[i]));
176  } catch (...) {
177  LogDebug("StringBranchHelper")
178  << "with sorting. could not evaluate expression: " << B.expr() << " on class: " << B.className();
179  value_->push_back(defaultValue); //push a default value to not change the indexing
180  }
181  }
182  } else {
183  //actually fill the vector of values
184  for (uint i = 0; i != i_end; ++i) {
185  //try and catch is necessary because ...
186  try {
187  if (selection && !((*selection)((*oH)[i])))
188  continue;
189  value_->push_back((expr)((*oH)[i]));
190  } catch (...) {
191  LogDebug("StringBranchHelper")
192  << "could not evaluate expression: " << B.expr() << " on class: " << B.className();
193  value_->push_back(defaultValue); //push a default value to not change the indexing
194  }
195  }
196  }
197  if (selection)
198  delete selection;
199  }
200  }
201 
202 private:
203  value value_;
204 };
205 
206 class StringBasedNTupler : public NTupler {
207 public:
209  edm::ParameterSet branchesPSet = iConfig.getParameter<edm::ParameterSet>("branchesPSet");
210  std::vector<std::string> branches;
211  branchesPSet.getParameterSetNames(branches);
212  const std::string separator = branchesPSet.getUntrackedParameter<std::string>("separator", ":");
213  for (uint b = 0; b != branches.size(); ++b) {
214  edm::ParameterSet bPSet = branchesPSet.getParameter<edm::ParameterSet>(branches[b]);
215  std::string className = "";
216  if (bPSet.exists("class"))
217  className = bPSet.getParameter<std::string>("class");
218  else
219  className = bPSet.getParameter<std::string>("Class");
220  edm::InputTag src = edm::Service<InputTagDistributorService>()->retrieve("src", bPSet);
221  edm::ParameterSet leavesPSet = bPSet.getParameter<edm::ParameterSet>("leaves");
222  std::string order = "";
223  if (bPSet.exists("order"))
224  order = bPSet.getParameter<std::string>("order");
225  std::string selection = "";
226  if (bPSet.exists("selection"))
227  selection = bPSet.getParameter<std::string>("selection");
228  // do it one by one with configuration [string x = "x"]
229  std::vector<std::string> leaves = leavesPSet.getParameterNamesForType<std::string>();
230  std::string maxName = "N" + branches[b];
231  for (uint l = 0; l != leaves.size(); ++l) {
232  std::string leave_expr = leavesPSet.getParameter<std::string>(leaves[l]);
233  std::string branchAlias = branches[b] + "_" + leaves[l];
234 
235  //add a branch manager for this expression on this collection
236  branches_[maxName].push_back(TreeBranch(className, src, leave_expr, order, selection, maxName, branchAlias));
237  } //loop the provided leaves
238 
239  //do it once with configuration [vstring vars = { "x:x" ,... } ] where ":"=separator
240  if (leavesPSet.exists("vars")) {
241  std::vector<std::string> leavesS = leavesPSet.getParameter<std::vector<std::string> >("vars");
242  for (uint l = 0; l != leavesS.size(); ++l) {
243  uint sep = leavesS[l].find(separator);
244  std::string name = leavesS[l].substr(0, sep);
245  //removes spaces from the variable name
246  /*uint*/ int space = name.find(" ");
247  while (space != -1 /*std::string::npos*/) {
248  std::string first = name.substr(0, space);
249  std::string second = name.substr(space + 1);
250  name = first + second;
251  space = name.find(" ");
252  }
253  std::string expr = leavesS[l].substr(sep + 1);
254  std::string branchAlias = branches[b] + "_" + name;
255 
256  //add a branch manager for this expression on this collection
257  branches_[maxName].push_back(TreeBranch(className, src, expr, order, selection, maxName, branchAlias));
258  }
259  }
260 
261  } //loop the provided branches
262 
263  ev_ = new uint64_t;
264  run_ = new uint;
265  lumiblock_ = new uint;
266  experimentType_ = new uint;
267  bunchCrossing_ = new uint;
268  orbitNumber_ = new uint;
269  weight_ = new float;
270  model_params_ = new std::string;
271 
272  if (branchesPSet.exists("useTFileService"))
273  useTFileService_ = branchesPSet.getParameter<bool>("useTFileService");
274  else
275  useTFileService_ = iConfig.getParameter<bool>("useTFileService");
276 
277  if (useTFileService_) {
278  if (branchesPSet.exists("treeName")) {
279  treeName_ = branchesPSet.getParameter<std::string>("treeName");
280  ownTheTree_ = true;
281  } else {
282  treeName_ = iConfig.getParameter<std::string>("treeName");
283  ownTheTree_ = false;
284  }
285  }
286  }
287 
288  uint registerleaves(edm::ProducesCollector producesCollector) override {
289  uint nLeaves = 0;
290 
291  if (useTFileService_) {
293  if (ownTheTree_) {
294  ownTheTree_ = true;
295  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
296  } else {
297  TObject* object = fs->file().Get(treeName_.c_str());
298  if (!object) {
299  ownTheTree_ = true;
300  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
301  } else {
302  tree_ = dynamic_cast<TTree*>(object);
303  if (!tree_) {
304  ownTheTree_ = true;
305  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
306  } else
307  ownTheTree_ = false;
308  }
309  }
310 
311  //reserve memory for the indexes
312  indexDataHolder_ = new uint[branches_.size()];
313  // loop the automated leafer
314  Branches::iterator iB = branches_.begin();
315  Branches::iterator iB_end = branches_.end();
316  uint indexOfIndexInDataHolder = 0;
317  for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
318  //create a branch for the index: an integer
319  tree_->Branch(iB->first.c_str(), &(indexDataHolder_[indexOfIndexInDataHolder]), (iB->first + "/i").c_str());
320  //loop on the "leaves"
321  std::vector<TreeBranch>::iterator iL = iB->second.begin();
322  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
323  for (; iL != iL_end; ++iL) {
324  TreeBranch& b = *iL;
325  //create a branch for the leaves: vector of floats
326  TBranch* br = tree_->Branch(b.branchAlias().c_str(), "std::vector<float>", iL->dataHolderPtrAdress());
327  br->SetTitle(b.branchTitle().c_str());
328  nLeaves++;
329  }
330  }
331 
332  //extra leaves for event info.
333  tree_->Branch("run", run_, "run/i");
334  tree_->Branch("event", ev_, "event/l");
335  tree_->Branch("lumiblock", lumiblock_, "lumiblock/i");
336  tree_->Branch("experimentType", experimentType_, "experimentType/i");
337  tree_->Branch("bunchCrossing", bunchCrossing_, "bunchCrossing/i");
338  tree_->Branch("orbitNumber", orbitNumber_, "orbitNumber/i");
339  tree_->Branch("weight", weight_, "weight/f");
340  tree_->Branch("model_params", &model_params_);
341 
342  } else {
343  // loop the automated leafer
344  Branches::iterator iB = branches_.begin();
345  Branches::iterator iB_end = branches_.end();
346  for (; iB != iB_end; ++iB) {
347  //the index. should produce it only once
348  // a simple uint for the index
349  producesCollector.produces<uint>(iB->first).setBranchAlias(iB->first);
350  std::vector<TreeBranch>::iterator iL = iB->second.begin();
351  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
352  for (; iL != iL_end; ++iL) {
353  TreeBranch& b = *iL;
354  //a vector of float for each leave
355  producesCollector.produces<std::vector<float> >(b.branchName()).setBranchAlias(b.branchAlias());
356  nLeaves++;
357  }
358  }
359  }
360  return nLeaves;
361  }
362 
363  void fill(edm::Event& iEvent) override {
364  // if (!edm::Service<UpdaterService>()->checkOnce("StringBasedNTupler::fill")) return;
365  //well if you do that, you cannot have two ntupler of the same type in the same job...
366 
367  if (useTFileService_) {
368  // loop the automated leafer
369  Branches::iterator iB = branches_.begin();
370  Branches::iterator iB_end = branches_.end();
371  uint indexOfIndexInDataHolder = 0;
372  for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
373  std::vector<TreeBranch>::iterator iL = iB->second.begin();
374  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
375  uint maxS = 0;
376  for (; iL != iL_end; ++iL) {
377  TreeBranch& b = *iL;
378  // grab the vector of values from the interpretation of expression for the associated collection
379  std::unique_ptr<std::vector<float> > branch(b.branch(iEvent));
380  // calculate the maximum index size.
381  if (branch->size() > maxS)
382  maxS = branch->size();
383  // transfer of (no copy) pointer to the vector of float from the std::unique_ptr to the tree data pointer
384  b.assignDataHolderPtr(branch.release());
385  // for memory tracing, object b is holding the data (not std::unique_ptr) and should delete it for each event (that's not completely optimum)
386  }
387  //assigne the maximum vector size for this collection
388  indexDataHolder_[indexOfIndexInDataHolder] = maxS;
389  }
390 
391  //fill event info.
392  *run_ = iEvent.id().run();
393  *ev_ = iEvent.id().event();
394  // *lumiblock_ = iEvent.id().luminosityBlock();
395  *lumiblock_ = iEvent.luminosityBlock();
396  *experimentType_ = iEvent.experimentType();
397  *bunchCrossing_ = iEvent.bunchCrossing();
398  *orbitNumber_ = iEvent.orbitNumber();
399 
400  *weight_ = 1;
401  if (!iEvent.isRealData()) {
402  edm::Handle<GenEventInfoProduct> wgeneventinfo;
403  iEvent.getByLabel("generator", wgeneventinfo);
404  *weight_ = wgeneventinfo->weight();
405  }
406 
407  typedef std::vector<std::string>::const_iterator comments_const_iterator;
408  // using namespace edm;
409 
411  *model_params_ = "NULL";
412  if (iEvent.getByLabel("source", product)) {
413  comments_const_iterator c_begin = product->comments_begin();
414  comments_const_iterator c_end = product->comments_end();
415 
416  for (comments_const_iterator cit = c_begin; cit != c_end; ++cit) {
417  size_t found = (*cit).find("model");
418  if (found != std::string::npos) {
419  //std::cout << *cit << std::endl;
420  *model_params_ = *cit;
421  }
422  }
423  }
424 
425  if (ownTheTree_) {
426  tree_->Fill();
427  }
428  } else {
429  // loop the automated leafer
430  Branches::iterator iB = branches_.begin();
431  Branches::iterator iB_end = branches_.end();
432  for (; iB != iB_end; ++iB) {
433  std::vector<TreeBranch>::iterator iL = iB->second.begin();
434  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
435  uint maxS = 0;
436  for (; iL != iL_end; ++iL) {
437  TreeBranch& b = *iL;
438  std::unique_ptr<std::vector<float> > branch(b.branch(iEvent));
439  if (branch->size() > maxS)
440  maxS = branch->size();
441  iEvent.put(std::move(branch), b.branchName());
442  }
443  //index should be put only once per branch. doe not really mattter for edm root files
444  iEvent.put(std::make_unique<uint>(maxS), iB->first);
445  }
446  }
447  }
448 
449  void callBack() {
450  if (useTFileService_) {
451  Branches::iterator iB = branches_.begin();
452  Branches::iterator iB_end = branches_.end();
453  //de-allocate memory now: allocated in branch(...) and released to the pointer.
454  for (; iB != iB_end; ++iB) {
455  std::vector<TreeBranch>::iterator iL = iB->second.begin();
456  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
457  for (; iL != iL_end; ++iL) {
458  TreeBranch& b = *iL;
459  delete b.dataHolderPtr();
460  }
461  }
462  }
463  }
464 
465  ~StringBasedNTupler() override {
466  delete indexDataHolder_;
467  delete ev_;
468  delete run_;
469  delete lumiblock_;
470  delete experimentType_;
471  delete bunchCrossing_;
472  delete orbitNumber_;
473  delete weight_;
474  delete model_params_;
475  }
476 
477 protected:
478  typedef std::map<std::string, std::vector<TreeBranch> > Branches;
479  Branches branches_;
480 
484 
485  //event info
492  float* weight_;
494 };
495 
496 #endif
const std::string & branchTitle() const
#define LogDebug(id)
RunNumber_t run() const
Definition: EventID.h:38
T getParameter(std::string const &) const
EventNumber_t event() const
Definition: EventID.h:40
T getUntrackedParameter(std::string const &, T const &) const
string separator
Definition: mps_merge.py:79
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:131
const std::string & branchAlias() const
std::vector< float > * dataHolderPtr()
const std::string branchName() const
ProductRegistryHelper::BranchAliasSetterT< ProductType > produces()
void fill(edm::Event &iEvent) override
value branch(const edm::Event &iEvent)
std::string order_
bool exists(std::string const &parameterName) const
checks if a parameter exists
int bunchCrossing() const
Definition: EventBase.h:64
def replace(string, replacements)
edm::LuminosityBlockNumber_t luminosityBlock() const
Definition: EventBase.h:61
T * make(const Args &...args) const
make new ROOT object
Definition: TFileService.h:64
std::string branchTitle_
std::string maxIndexName_
TreeBranch::value value
StringLeaveHelper(const TreeBranch &B, const edm::Event &iEvent)
const std::string & selection() const
double weight() const
bool isRealData() const
Definition: EventBase.h:62
std::string encode() const
Definition: InputTag.cc:159
std::vector< float > * dataHolderPtr_
const std::string & order() const
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:168
std::string * model_params_
U second(std::pair< T, U > const &p)
int iEvent
Definition: GenABIO.cc:224
const edm::InputTag & src() const
comments_const_iterator comments_begin() const
std::map< std::string, std::vector< TreeBranch > > Branches
~StringBasedNTupler() override
TreeBranch(std::string C, edm::InputTag S, std::string E, std::string O, std::string SE, std::string Mi, std::string Ba)
std::string branchAlias_
int orbitNumber() const
Definition: EventBase.h:65
static const std::string B
std::unique_ptr< std::vector< float > > value
std::string expr_
bool getByLabel(InputTag const &tag, Handle< PROD > &result) const
Definition: Event.h:488
TreeBranch::value value
uint registerleaves(edm::ProducesCollector producesCollector) override
edm::InputTag src_
void assignDataHolderPtr(std::vector< float > *data)
bool failedToGet() const
Definition: HandleBase.h:72
TFile & file() const
return opened TFile
Definition: TFileService.h:37
unsigned long long uint64_t
Definition: Time.h:13
double b
Definition: hdecay.h:118
std::string selection_
const std::string & className() const
edm::EventID id() const
Definition: EventBase.h:59
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
comments_const_iterator comments_end() const
edm::EventAuxiliary::ExperimentType experimentType() const
Definition: EventBase.h:63
const std::string & maxIndexName() const
std::string class_
size_t getParameterSetNames(std::vector< std::string > &output, bool trackiness=true) const
const std::string & expr() const
StringBranchHelper(const TreeBranch &B, const edm::Event &iEvent)
def move(src, dest)
Definition: eostools.py:511
StringBasedNTupler(const edm::ParameterSet &iConfig)
std::vector< float > ** dataHolderPtrAdress()