CMS 3D CMS Logo

StringBasedNTupler.h
Go to the documentation of this file.
1 #ifndef StringBasedNTupler_NTupler_H
2 #define StringBasedNTupler_NTupler_H
3 
7 
9 #include "TTree.h"
10 #include "TBranch.h"
11 #include "TFile.h"
12 
14 
21 
23 
26 
27 #include <memory>
28 #include <string>
29 #include <sstream>
30 
35 
36 // LHE Event
38 
39 class TreeBranch {
40 public:
41  TreeBranch() : class_(""), expr_(""), order_(""), selection_(""), maxIndexName_(""), branchAlias_("") {}
44  : class_(C), src_(S), expr_(E), order_(O), selection_(SE), maxIndexName_(Mi), branchAlias_(Ba) {
45  branchTitle_ = E + " calculated on " + C + " object from " + S.encode();
46  if (!O.empty())
47  branchTitle_ += " ordered according to " + O;
48  if (!SE.empty())
49  branchTitle_ += " selecting on " + SE;
50  edm::LogInfo("TreeBranch") << "the branch with alias: " << branchAlias_ << " corresponds to: " << branchTitle_;
51  }
52 
53  const std::string& className() const { return class_; }
54  const edm::InputTag& src() const { return src_; }
55  const std::string& expr() const { return expr_; }
56  const std::string& order() const { return order_; }
57  const std::string& selection() const { return selection_; }
58  const std::string& maxIndexName() const { return maxIndexName_; }
59  const std::string branchName() const {
61  std::replace(name.begin(), name.end(), '_', '0');
62  return std::string(name);
63  }
64  const std::string& branchAlias() const { return branchAlias_; }
65  const std::string& branchTitle() const { return branchTitle_; }
66  typedef std::unique_ptr<std::vector<float>> value;
67  value branch(const edm::Event& iEvent);
68 
69  std::vector<float>** dataHolderPtrAdress() { return &dataHolderPtr_; }
70  std::vector<float>* dataHolderPtr() { return dataHolderPtr_; }
71  void assignDataHolderPtr(std::vector<float>* data) { dataHolderPtr_ = data; }
72 
73 private:
82 
83  std::vector<float>* dataHolderPtr_;
84 };
85 
86 template <typename Object>
88 public:
91 
93  const float defaultValue = 0.;
94  // grab the object
96  iEvent.getByLabel(B.src(), oH);
97  //empty vector if product not found
98  if (oH.failedToGet()) {
99  edm::LogError("StringBranchHelper") << "cannot open: " << B.src();
100  value_ = std::make_unique<std::vector<float>>(0);
101  } else {
102  //parser for the object expression
103  StringObjectFunction<Object> expr(B.expr());
104  //allocate enough memory for the data holder
105  value_ = std::make_unique<std::vector<float>>(1);
106  try {
107  (*value_)[0] = (expr)(*oH);
108  } catch (...) {
109  LogDebug("StringLeaveHelper") << "could not evaluate expression: " << B.expr()
110  << " on class: " << B.className();
111  (*value_)[0] = defaultValue;
112  }
113  }
114  }
115 
116 private:
118 };
119 
120 template <typename Object, typename Collection = std::vector<Object>>
122 public:
125 
127  const float defaultValue = 0.;
128 
129  // grab the collection
131  iEvent.getByLabel(B.src(), oH);
132 
133  //empty vector if product not found
134  if (oH.failedToGet()) {
135  if (!(iEvent.isRealData() && B.className() == "reco::GenParticle")) { //don't output genparticle error in data
136  edm::LogError("StringBranchHelper") << "cannot open: " << B.src() << " " << B.className();
137  }
138  value_ = std::make_unique<std::vector<float>>();
139  } else {
140  //parser for the object expression
141  StringObjectFunction<Object> expr(B.expr());
142  //allocate enough memory for the data holder
143  value_ = std::make_unique<std::vector<float>>();
144  value_->reserve(oH->size());
145 
147  if (!B.selection().empty()) {
148  //std::cout<<"trying to get to a selection"<<std::endl;
149  selection = new StringCutObjectSelector<Object>(B.selection());
150  //std::cout<<"got the objet"<<std::endl;
151  }
152  uint i_end = oH->size();
153  //sort things first if requested
154  if (!B.order().empty()) {
156  // allocate a vector of pointers (we are using view) to be sorted
157  std::vector<const Object*> copyToSort(oH->size());
158  for (uint i = 0; i != i_end; ++i)
159  copyToSort[i] = &(*oH)[i];
160  std::sort(copyToSort.begin(), copyToSort.end(), sortByStringFunction<Object>(&order));
161  //then loop and fill
162  for (uint i = 0; i != i_end; ++i) {
163  //try and catch is necessary because ...
164  try {
165  if (selection && !((*selection)(*(copyToSort)[i])))
166  continue;
167  value_->push_back((expr)(*(copyToSort)[i]));
168  } catch (...) {
169  LogDebug("StringBranchHelper")
170  << "with sorting. could not evaluate expression: " << B.expr() << " on class: " << B.className();
171  value_->push_back(defaultValue); //push a default value to not change the indexing
172  }
173  }
174  } else {
175  //actually fill the vector of values
176  for (uint i = 0; i != i_end; ++i) {
177  //try and catch is necessary because ...
178  try {
179  if (selection && !((*selection)((*oH)[i])))
180  continue;
181  value_->push_back((expr)((*oH)[i]));
182  } catch (...) {
183  LogDebug("StringBranchHelper")
184  << "could not evaluate expression: " << B.expr() << " on class: " << B.className();
185  value_->push_back(defaultValue); //push a default value to not change the indexing
186  }
187  }
188  }
189  if (selection)
190  delete selection;
191  }
192  }
193 
194 private:
196 };
197 
198 class StringBasedNTupler : public NTupler {
199 public:
201  edm::ParameterSet branchesPSet = iConfig.getParameter<edm::ParameterSet>("branchesPSet");
202  std::vector<std::string> branches;
203  branchesPSet.getParameterSetNames(branches);
204  const std::string separator = branchesPSet.getUntrackedParameter<std::string>("separator", ":");
205  for (uint b = 0; b != branches.size(); ++b) {
206  edm::ParameterSet bPSet = branchesPSet.getParameter<edm::ParameterSet>(branches[b]);
207  std::string className = "";
208  if (bPSet.exists("class"))
209  className = bPSet.getParameter<std::string>("class");
210  else
211  className = bPSet.getParameter<std::string>("Class");
212  edm::InputTag src = edm::Service<InputTagDistributorService>()->retrieve("src", bPSet);
213  edm::ParameterSet leavesPSet = bPSet.getParameter<edm::ParameterSet>("leaves");
214  std::string order = "";
215  if (bPSet.exists("order"))
216  order = bPSet.getParameter<std::string>("order");
217  std::string selection = "";
218  if (bPSet.exists("selection"))
219  selection = bPSet.getParameter<std::string>("selection");
220  // do it one by one with configuration [string x = "x"]
221  std::vector<std::string> leaves = leavesPSet.getParameterNamesForType<std::string>();
222  std::string maxName = "N" + branches[b];
223  for (uint l = 0; l != leaves.size(); ++l) {
224  std::string leave_expr = leavesPSet.getParameter<std::string>(leaves[l]);
225  std::string branchAlias = branches[b] + "_" + leaves[l];
226 
227  //add a branch manager for this expression on this collection
228  branches_[maxName].push_back(TreeBranch(className, src, leave_expr, order, selection, maxName, branchAlias));
229  } //loop the provided leaves
230 
231  //do it once with configuration [vstring vars = { "x:x" ,... } ] where ":"=separator
232  if (leavesPSet.exists("vars")) {
233  std::vector<std::string> leavesS = leavesPSet.getParameter<std::vector<std::string>>("vars");
234  for (uint l = 0; l != leavesS.size(); ++l) {
235  uint sep = leavesS[l].find(separator);
236  std::string name = leavesS[l].substr(0, sep);
237  //removes spaces from the variable name
238  /*uint*/ int space = name.find(' ');
239  while (space != -1 /*std::string::npos*/) {
240  std::string first = name.substr(0, space);
241  std::string second = name.substr(space + 1);
242  name = first + second;
243  space = name.find(' ');
244  }
245  std::string expr = leavesS[l].substr(sep + 1);
246  std::string branchAlias = branches[b] + "_" + name;
247 
248  //add a branch manager for this expression on this collection
249  branches_[maxName].push_back(TreeBranch(className, src, expr, order, selection, maxName, branchAlias));
250  }
251  }
252 
253  } //loop the provided branches
254 
255  ev_ = new uint64_t;
256  run_ = new uint;
257  lumiblock_ = new uint;
258  experimentType_ = new uint;
259  bunchCrossing_ = new uint;
260  orbitNumber_ = new uint;
261  weight_ = new float;
263 
264  if (branchesPSet.exists("useTFileService"))
265  useTFileService_ = branchesPSet.getParameter<bool>("useTFileService");
266  else
267  useTFileService_ = iConfig.getParameter<bool>("useTFileService");
268 
269  if (useTFileService_) {
270  if (branchesPSet.exists("treeName")) {
271  treeName_ = branchesPSet.getParameter<std::string>("treeName");
272  ownTheTree_ = true;
273  } else {
274  treeName_ = iConfig.getParameter<std::string>("treeName");
275  ownTheTree_ = false;
276  }
277  }
278  }
279 
280  uint registerleaves(edm::ProducesCollector producesCollector) override {
281  uint nLeaves = 0;
282 
283  if (useTFileService_) {
285  if (ownTheTree_) {
286  ownTheTree_ = true;
287  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
288  } else {
289  TObject* object = fs->file().Get(treeName_.c_str());
290  if (!object) {
291  ownTheTree_ = true;
292  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
293  } else {
294  tree_ = dynamic_cast<TTree*>(object);
295  if (!tree_) {
296  ownTheTree_ = true;
297  tree_ = fs->make<TTree>(treeName_.c_str(), "StringBasedNTupler tree");
298  } else
299  ownTheTree_ = false;
300  }
301  }
302 
303  //reserve memory for the indexes
304  indexDataHolder_ = new uint[branches_.size()];
305  // loop the automated leafer
306  Branches::iterator iB = branches_.begin();
307  Branches::iterator iB_end = branches_.end();
308  uint indexOfIndexInDataHolder = 0;
309  for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
310  //create a branch for the index: an integer
311  tree_->Branch(iB->first.c_str(), &(indexDataHolder_[indexOfIndexInDataHolder]), (iB->first + "/i").c_str());
312  //loop on the "leaves"
313  std::vector<TreeBranch>::iterator iL = iB->second.begin();
314  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
315  for (; iL != iL_end; ++iL) {
316  TreeBranch& b = *iL;
317  //create a branch for the leaves: vector of floats
318  TBranch* br = tree_->Branch(b.branchAlias().c_str(), "std::vector<float>", iL->dataHolderPtrAdress());
319  br->SetTitle(b.branchTitle().c_str());
320  nLeaves++;
321  }
322  }
323 
324  //extra leaves for event info.
325  tree_->Branch("run", run_, "run/i");
326  tree_->Branch("event", ev_, "event/l");
327  tree_->Branch("lumiblock", lumiblock_, "lumiblock/i");
328  tree_->Branch("experimentType", experimentType_, "experimentType/i");
329  tree_->Branch("bunchCrossing", bunchCrossing_, "bunchCrossing/i");
330  tree_->Branch("orbitNumber", orbitNumber_, "orbitNumber/i");
331  tree_->Branch("weight", weight_, "weight/f");
332  tree_->Branch("model_params", &model_params_);
333 
334  } else {
335  // loop the automated leafer
336  Branches::iterator iB = branches_.begin();
337  Branches::iterator iB_end = branches_.end();
338  for (; iB != iB_end; ++iB) {
339  //the index. should produce it only once
340  // a simple uint for the index
341  producesCollector.produces<uint>(iB->first).setBranchAlias(iB->first);
342  std::vector<TreeBranch>::iterator iL = iB->second.begin();
343  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
344  for (; iL != iL_end; ++iL) {
345  TreeBranch& b = *iL;
346  //a vector of float for each leave
347  producesCollector.produces<std::vector<float>>(b.branchName()).setBranchAlias(b.branchAlias());
348  nLeaves++;
349  }
350  }
351  }
352  return nLeaves;
353  }
354 
355  void fill(edm::Event& iEvent) override {
356  // if (!edm::Service<UpdaterService>()->checkOnce("StringBasedNTupler::fill")) return;
357  //well if you do that, you cannot have two ntupler of the same type in the same job...
358 
359  if (useTFileService_) {
360  // loop the automated leafer
361  Branches::iterator iB = branches_.begin();
362  Branches::iterator iB_end = branches_.end();
363  uint indexOfIndexInDataHolder = 0;
364  for (; iB != iB_end; ++iB, ++indexOfIndexInDataHolder) {
365  std::vector<TreeBranch>::iterator iL = iB->second.begin();
366  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
367  uint maxS = 0;
368  for (; iL != iL_end; ++iL) {
369  TreeBranch& b = *iL;
370  // grab the vector of values from the interpretation of expression for the associated collection
371  std::unique_ptr<std::vector<float>> branch(b.branch(iEvent));
372  // calculate the maximum index size.
373  if (branch->size() > maxS)
374  maxS = branch->size();
375  // transfer of (no copy) pointer to the vector of float from the std::unique_ptr to the tree data pointer
376  b.assignDataHolderPtr(branch.release());
377  // for memory tracing, object b is holding the data (not std::unique_ptr) and should delete it for each event (that's not completely optimum)
378  }
379  //assigne the maximum vector size for this collection
380  indexDataHolder_[indexOfIndexInDataHolder] = maxS;
381  }
382 
383  //fill event info.
384  *run_ = iEvent.id().run();
385  *ev_ = iEvent.id().event();
386  // *lumiblock_ = iEvent.id().luminosityBlock();
387  *lumiblock_ = iEvent.luminosityBlock();
388  *experimentType_ = iEvent.experimentType();
389  *bunchCrossing_ = iEvent.bunchCrossing();
390  *orbitNumber_ = iEvent.orbitNumber();
391 
392  *weight_ = 1;
393  if (!iEvent.isRealData()) {
394  edm::Handle<GenEventInfoProduct> wgeneventinfo;
395  iEvent.getByLabel("generator", wgeneventinfo);
396  *weight_ = wgeneventinfo->weight();
397  }
398 
399  typedef std::vector<std::string>::const_iterator comments_const_iterator;
400  // using namespace edm;
401 
403  *model_params_ = "NULL";
404  if (iEvent.getByLabel("source", product)) {
405  comments_const_iterator c_begin = product->comments_begin();
406  comments_const_iterator c_end = product->comments_end();
407 
408  for (comments_const_iterator cit = c_begin; cit != c_end; ++cit) {
409  size_t found = (*cit).find("model");
410  if (found != std::string::npos) {
411  //std::cout << *cit << std::endl;
412  *model_params_ = *cit;
413  }
414  }
415  }
416 
417  if (ownTheTree_) {
418  tree_->Fill();
419  }
420  } else {
421  // loop the automated leafer
422  Branches::iterator iB = branches_.begin();
423  Branches::iterator iB_end = branches_.end();
424  for (; iB != iB_end; ++iB) {
425  std::vector<TreeBranch>::iterator iL = iB->second.begin();
426  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
427  uint maxS = 0;
428  for (; iL != iL_end; ++iL) {
429  TreeBranch& b = *iL;
430  std::unique_ptr<std::vector<float>> branch(b.branch(iEvent));
431  if (branch->size() > maxS)
432  maxS = branch->size();
433  iEvent.put(std::move(branch), b.branchName());
434  }
435  //index should be put only once per branch. doe not really mattter for edm root files
436  iEvent.put(std::make_unique<uint>(maxS), iB->first);
437  }
438  }
439  }
440 
441  void callBack() {
442  if (useTFileService_) {
443  Branches::iterator iB = branches_.begin();
444  Branches::iterator iB_end = branches_.end();
445  //de-allocate memory now: allocated in branch(...) and released to the pointer.
446  for (; iB != iB_end; ++iB) {
447  std::vector<TreeBranch>::iterator iL = iB->second.begin();
448  std::vector<TreeBranch>::iterator iL_end = iB->second.end();
449  for (; iL != iL_end; ++iL) {
450  TreeBranch& b = *iL;
451  delete b.dataHolderPtr();
452  }
453  }
454  }
455  }
456 
457  ~StringBasedNTupler() override {
458  delete indexDataHolder_;
459  delete ev_;
460  delete run_;
461  delete lumiblock_;
462  delete experimentType_;
463  delete bunchCrossing_;
464  delete orbitNumber_;
465  delete weight_;
466  delete model_params_;
467  }
468 
469 protected:
470  typedef std::map<std::string, std::vector<TreeBranch>> Branches;
472 
476 
477  //event info
484  float* weight_;
486 };
487 
488 #endif
const edm::InputTag & src() const
string separator
Definition: mps_merge.py:79
T getParameter(std::string const &) const
Definition: ParameterSet.h:307
TTree * tree_
Definition: NTupler.h:28
Definition: APVGainStruct.h:7
std::vector< float > * dataHolderPtr()
const std::string branchName() const
ProductRegistryHelper::BranchAliasSetterT< ProductType > produces()
void fill(edm::Event &iEvent) override
value branch(const edm::Event &iEvent)
bool exists(std::string const &parameterName) const
checks if a parameter exists
std::string order_
selection
main part
Definition: corrVsCorr.py:100
const std::string & className() const
def replace(string, replacements)
const std::string & expr() const
std::string branchTitle_
std::string maxIndexName_
TreeBranch::value value
StringLeaveHelper(const TreeBranch &B, const edm::Event &iEvent)
Log< level::Error, false > LogError
std::vector< float > * dataHolderPtr_
size_t getParameterSetNames(std::vector< std::string > &output, bool trackiness=true) const
T getUntrackedParameter(std::string const &, T const &) const
std::string * model_params_
U second(std::pair< T, U > const &p)
bool failedToGet() const
Definition: HandleBase.h:72
int iEvent
Definition: GenABIO.cc:224
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:180
std::map< std::string, std::vector< TreeBranch > > Branches
const std::string & branchAlias() const
~StringBasedNTupler() override
const std::string & selection() const
TreeBranch(std::string C, edm::InputTag S, std::string E, std::string O, std::string SE, std::string Mi, std::string Ba)
std::string branchAlias_
Definition: value.py:1
std::unique_ptr< std::vector< float > > value
std::string expr_
TreeBranch::value value
const std::string & branchTitle() const
uint registerleaves(edm::ProducesCollector producesCollector) override
edm::InputTag src_
void assignDataHolderPtr(std::vector< float > *data)
Log< level::Info, false > LogInfo
comments_const_iterator comments_begin() const
unsigned long long uint64_t
Definition: Time.h:13
double b
Definition: hdecay.h:120
const std::string & order() const
std::string selection_
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
const std::string & maxIndexName() const
std::string class_
bool useTFileService_
Definition: NTupler.h:27
StringBranchHelper(const TreeBranch &B, const edm::Event &iEvent)
def move(src, dest)
Definition: eostools.py:511
std::string className(const T &t)
Definition: ClassName.h:31
StringBasedNTupler(const edm::ParameterSet &iConfig)
comments_const_iterator comments_end() const
#define LogDebug(id)
std::vector< float > ** dataHolderPtrAdress()