CMS 3D CMS Logo

StringBasedNTupler.h
Go to the documentation of this file.
1 #ifndef StringBasedNTupler_NTupler_H
2 #define StringBasedNTupler_NTupler_H
3 
4 //#include "PhysicsTools/UtilAlgos/interface/UpdaterService.h"
5 
11 
12 //#include "PhysicsTools/UtilAlgos/interface/TFileService.h"
14 #include "TTree.h"
15 #include "TBranch.h"
16 #include "TFile.h"
17 
19 
26 
28 
31 
32 //#define StringBasedNTuplerPrecision float;
33 
34 #include <memory>
35 #include <string>
36 #include <sstream>
37 
43 
44 // LHE Event
46 
47 
48 class TreeBranch {
49  public:
52  class_(C),src_(S),expr_(E),order_(O), selection_(SE),maxIndexName_(Mi),branchAlias_(Ba){
53  branchTitle_= E+" calculated on "+C+" object from "+S.encode();
54  if (O!="") branchTitle_+=" ordered according to "+O;
55  if (SE!="") branchTitle_+=" selecting on "+SE;
56  edm::LogInfo("TreeBranch")<<"the branch with alias: "<<branchAlias_<<" corresponds to: "<<branchTitle_;
57  }
58 
59  const std::string & className() const { return class_;}
60  const edm::InputTag & src() const { return src_;}
61  const std::string & expr() const { return expr_;}
62  const std::string & order() const { return order_;}
63  const std::string & selection() const { return selection_;}
64  const std::string & maxIndexName() const { return maxIndexName_;}
65  const std::string branchName()const{
67  std::replace(name.begin(), name.end(), '_','0');
68  return std::string(name.c_str());}
69  const std::string & branchAlias()const{ return branchAlias_;}
70  const std::string & branchTitle()const{ return branchTitle_;}
71  typedef std::unique_ptr<std::vector<float> > value;
72  value branch(const edm::Event& iEvent);
73 
74  std::vector<float>** dataHolderPtrAdress() { return &dataHolderPtr_;}
75  std::vector<float>* dataHolderPtr() { return dataHolderPtr_;}
76  void assignDataHolderPtr(std::vector<float> * data) { dataHolderPtr_=data;}
77  private:
86 
87  std::vector<float> * dataHolderPtr_;
88 };
89 
90 
91 template <typename Object>
93  public:
95  value operator()() { return std::move(value_);}
96 
98  {
99  const float defaultValue = 0.;
100  // grab the object
102  iEvent.getByLabel(B.src(), oH);
103  //empty vector if product not found
104  if (oH.failedToGet()){
105  edm::LogError("StringBranchHelper")<<"cannot open: "<<B.src();
106  value_.reset(new std::vector<float>(0));
107  }
108  else{
109  //parser for the object expression
111  //allocate enough memory for the data holder
112  value_.reset(new std::vector<float>(1));
113  try{
114  (*value_)[0]=(expr)(*oH);
115  }catch(...){
116  LogDebug("StringLeaveHelper")<<"could not evaluate expression: "<<B.expr()<<" on class: "<<B.className();
117  (*value_)[0]=defaultValue;
118  }
119  }
120  }
121  private:
122  value value_;
123 };
124 
125 template <typename Object, typename Collection=std::vector<Object> >
127 public:
129  value operator()() { return std::move(value_);}
130 
132  {
133  const float defaultValue = 0.;
134 
135  // grab the collection
137  iEvent.getByLabel(B.src(), oH);
138 
139 
140  //empty vector if product not found
141  if (oH.failedToGet()){
142  if(!(iEvent.isRealData() && B.className()=="reco::GenParticle") ) { //don't output genparticle error in data
143  edm::LogError("StringBranchHelper")<<"cannot open: "<<B.src()<<" "<<B.className();
144  }
145  value_.reset(new std::vector<float>());
146  }
147  else{
148  //parser for the object expression
150  //allocate enough memory for the data holder
151  value_.reset(new std::vector<float>());
152  value_->reserve(oH->size());
153 
155  if (B.selection()!=""){
156  //std::cout<<"trying to get to a selection"<<std::endl;
157  selection = new StringCutObjectSelector<Object>(B.selection());
158  //std::cout<<"got the objet"<<std::endl;
159  }
160  uint i_end=oH->size();
161  //sort things first if requested
162  if (B.order()!=""){
164  // allocate a vector of pointers (we are using view) to be sorted
165  std::vector<const Object*> copyToSort(oH->size());
166  for (uint i=0;i!=i_end;++i) copyToSort[i]= &(*oH)[i];
167  std::sort(copyToSort.begin(), copyToSort.end(), sortByStringFunction<Object>(&order));
168  //then loop and fill
169  for (uint i=0;i!=i_end;++i) {
170  //try and catch is necessary because ...
171  try{
172  if (selection && !((*selection)(*(copyToSort)[i]))) continue;
173  value_->push_back((expr)(*(copyToSort)[i]));
174  }catch(...){
175  LogDebug("StringBranchHelper")<<"with sorting. could not evaluate expression: "<<B.expr()<<" on class: "<<B.className();
176  value_->push_back(defaultValue);//push a default value to not change the indexing
177  }
178  }
179  }
180  else{
181  //actually fill the vector of values
182  for (uint i=0;i!=i_end;++i){
183  //try and catch is necessary because ...
184  try {
185  if (selection && !((*selection)((*oH)[i]))) continue;
186  value_->push_back((expr)((*oH)[i]));
187  }catch(...){
188  LogDebug("StringBranchHelper")<<"could not evaluate expression: "<<B.expr()<<" on class: "<<B.className();
189  value_->push_back(defaultValue);//push a default value to not change the indexing
190  }
191  }
192  }
193  if (selection) delete selection;
194  }
195  }
196  private:
197  value value_;
198 };
199 
200 
201 
202 class StringBasedNTupler : public NTupler {
203 
204 
205  public:
207 
208 
209 
210  edm::ParameterSet branchesPSet = iConfig.getParameter<edm::ParameterSet>("branchesPSet");
211  std::vector<std::string> branches;
212  branchesPSet.getParameterSetNames(branches);
213  const std::string separator = branchesPSet.getUntrackedParameter<std::string>("separator",":");
214  for (uint b=0;b!=branches.size();++b){
215  edm::ParameterSet bPSet = branchesPSet.getParameter<edm::ParameterSet>(branches[b]);
217  if (bPSet.exists("class"))
218  className=bPSet.getParameter<std::string>("class");
219  else
220  className=bPSet.getParameter<std::string>("Class");
222  edm::ParameterSet leavesPSet=bPSet.getParameter<edm::ParameterSet>("leaves");
223  std::string order = "";
224  if (bPSet.exists("order")) order = bPSet.getParameter<std::string>("order");
225  std::string selection = "";
226  if (bPSet.exists("selection")) selection = bPSet.getParameter<std::string>("selection");
227  // do it one by one with configuration [string x = "x"]
228  std::vector<std::string> leaves=leavesPSet.getParameterNamesForType<std::string>();
229  std::string maxName="N"+branches[b];
230  for (uint l=0;l!=leaves.size();++l){
231  std::string leave_expr=leavesPSet.getParameter<std::string>(leaves[l]);
232  std::string branchAlias=branches[b]+"_"+leaves[l];
233 
234  //add a branch manager for this expression on this collection
235  branches_[maxName].push_back(TreeBranch(className, src, leave_expr, order, selection, maxName, branchAlias));
236  }//loop the provided leaves
237 
238  //do it once with configuration [vstring vars = { "x:x" ,... } ] where ":"=separator
239  if (leavesPSet.exists("vars")){
240  std::vector<std::string> leavesS = leavesPSet.getParameter<std::vector<std::string> >("vars");
241  for (uint l=0;l!=leavesS.size();++l){
242  uint sep=leavesS[l].find(separator);
243  std::string name=leavesS[l].substr(0,sep);
244  //removes spaces from the variable name
245  /*uint*/int space = name.find(" ");
246  while (space!=-1/*std::string::npos*/){
247  std::string first = name.substr(0,space);
248  std::string second = name.substr(space+1);
249  name = first+second;
250  space = name.find(" ");
251  }
252  std::string expr=leavesS[l].substr(sep+1);
253  std::string branchAlias=branches[b]+"_"+name;
254 
255  //add a branch manager for this expression on this collection
256  branches_[maxName].push_back(TreeBranch(className, src, expr, order, selection, maxName, branchAlias));
257  }
258  }
259 
260  }//loop the provided branches
261 
262 
263 
264 
265  ev_ = new uint64_t;
266  run_ = new uint;
267  lumiblock_ = new uint;
268  experimentType_ = new uint;
269  bunchCrossing_ = new uint;
270  orbitNumber_ = new uint;
271  weight_ = new float;
272  model_params_ = new std::string;
273 
274 
275  if (branchesPSet.exists("useTFileService"))
276  useTFileService_=branchesPSet.getParameter<bool>("useTFileService");
277  else
278  useTFileService_=iConfig.getParameter<bool>("useTFileService");
279 
280  if (useTFileService_){
281  if (branchesPSet.exists("treeName")){
282  treeName_=branchesPSet.getParameter<std::string>("treeName");
283  ownTheTree_=true;
284  }
285  else{
286  treeName_=iConfig.getParameter<std::string>("treeName");
287  ownTheTree_=false;
288  }
289  }
290  }
291 
292 
293 
295  uint nLeaves=0;
296 
297  if (useTFileService_){
299  if (ownTheTree_){
300  ownTheTree_=true;
301  tree_=fs->make<TTree>(treeName_.c_str(),"StringBasedNTupler tree");
302  }else{
303  TObject * object = fs->file().Get(treeName_.c_str());
304  if (!object){
305  ownTheTree_=true;
306  tree_=fs->make<TTree>(treeName_.c_str(),"StringBasedNTupler tree");
307  }
308  else{
309  tree_=dynamic_cast<TTree*>(object);
310  if (!tree_){
311  ownTheTree_=true;
312  tree_=fs->make<TTree>(treeName_.c_str(),"StringBasedNTupler tree");
313  }
314  else ownTheTree_=false;
315  }
316  }
317 
318  //reserve memory for the indexes
319  indexDataHolder_ = new uint[branches_.size()];
320  // loop the automated leafer
321  Branches::iterator iB=branches_.begin();
322  Branches::iterator iB_end=branches_.end();
323  uint indexOfIndexInDataHolder=0;
324  for(;iB!=iB_end;++iB,++indexOfIndexInDataHolder){
325  //create a branch for the index: an integer
326  tree_->Branch(iB->first.c_str(), &(indexDataHolder_[indexOfIndexInDataHolder]),(iB->first+"/i").c_str());
327  //loop on the "leaves"
328  std::vector<TreeBranch>::iterator iL=iB->second.begin();
329  std::vector<TreeBranch>::iterator iL_end=iB->second.end();
330  for(;iL!=iL_end;++iL){
331  TreeBranch & b=*iL;
332  //create a branch for the leaves: vector of floats
333  TBranch * br = tree_->Branch(b.branchAlias().c_str(),"std::vector<float>",iL->dataHolderPtrAdress());
334  br->SetTitle(b.branchTitle().c_str());
335  nLeaves++;
336  }
337  }
338 
339  //extra leaves for event info.
340  tree_->Branch("run",run_,"run/i");
341  tree_->Branch("event",ev_,"event/l");
342  tree_->Branch("lumiblock",lumiblock_,"lumiblock/i");
343  tree_->Branch("experimentType",experimentType_,"experimentType/i");
344  tree_->Branch("bunchCrossing",bunchCrossing_,"bunchCrossing/i");
345  tree_->Branch("orbitNumber",orbitNumber_,"orbitNumber/i");
346  tree_->Branch("weight",weight_,"weight/f");
347  tree_->Branch("model_params",&model_params_);
348 
349  }
350  else{
351  // loop the automated leafer
352  Branches::iterator iB=branches_.begin();
353  Branches::iterator iB_end=branches_.end();
354  for(;iB!=iB_end;++iB){
355  //the index. should produce it only once
356  // a simple uint for the index
357  producer->produces<uint>(iB->first).setBranchAlias(iB->first);
358  std::vector<TreeBranch>::iterator iL=iB->second.begin();
359  std::vector<TreeBranch>::iterator iL_end=iB->second.end();
360  for(;iL!=iL_end;++iL){
361  TreeBranch & b=*iL;
362  //a vector of float for each leave
363  producer->produces<std::vector<float> >(b.branchName()).setBranchAlias(b.branchAlias());
364  nLeaves++;
365  }
366  }
367  }
368  return nLeaves;
369  }
370 
372  // if (!edm::Service<UpdaterService>()->checkOnce("StringBasedNTupler::fill")) return;
373  //well if you do that, you cannot have two ntupler of the same type in the same job...
374 
375  if (useTFileService_){
376  // loop the automated leafer
377  Branches::iterator iB=branches_.begin();
378  Branches::iterator iB_end=branches_.end();
379  uint indexOfIndexInDataHolder=0;
380  for(;iB!=iB_end;++iB,++indexOfIndexInDataHolder){
381  std::vector<TreeBranch>::iterator iL=iB->second.begin();
382  std::vector<TreeBranch>::iterator iL_end=iB->second.end();
383  uint maxS=0;
384  for(;iL!=iL_end;++iL){
385  TreeBranch & b=*iL;
386  // grab the vector of values from the interpretation of expression for the associated collection
387  std::unique_ptr<std::vector<float> > branch(b.branch(iEvent));
388  // calculate the maximum index size.
389  if (branch->size()>maxS) maxS=branch->size();
390  // transfer of (no copy) pointer to the vector of float from the std::unique_ptr to the tree data pointer
391  b.assignDataHolderPtr(branch.release());
392  // for memory tracing, object b is holding the data (not std::unique_ptr) and should delete it for each event (that's not completely optimum)
393  }
394  //assigne the maximum vector size for this collection
395  indexDataHolder_[indexOfIndexInDataHolder]=maxS;
396  }
397 
398  //fill event info.
399  *run_ = iEvent.id().run();
400  *ev_ = iEvent.id().event();
401  // *lumiblock_ = iEvent.id().luminosityBlock();
402  *lumiblock_ = iEvent.luminosityBlock();
403  *experimentType_ = iEvent.experimentType();
404  *bunchCrossing_ = iEvent.bunchCrossing();
405  *orbitNumber_ = iEvent.orbitNumber();
406 
407  *weight_ = 1;
408  if(!iEvent.isRealData()) {
409  edm::Handle<GenEventInfoProduct> wgeneventinfo;
410  iEvent.getByLabel("generator", wgeneventinfo);
411  *weight_ = wgeneventinfo->weight();
412  }
413 
414  typedef std::vector<std::string>::const_iterator comments_const_iterator;
415 // using namespace edm;
416 
418  *model_params_ = "NULL";
419  if(iEvent.getByLabel("source", product)) {
420  comments_const_iterator c_begin = product->comments_begin();
421  comments_const_iterator c_end = product->comments_end();
422 
423  for( comments_const_iterator cit=c_begin; cit!=c_end; ++cit) {
424  size_t found = (*cit).find("model");
425  if( found != std::string::npos) {
426  //std::cout << *cit << std::endl;
427  *model_params_ = *cit;
428  }
429  }
430  }
431 
432 
433  if (ownTheTree_){ tree_->Fill(); }
434  }else{
435  // loop the automated leafer
436  Branches::iterator iB=branches_.begin();
437  Branches::iterator iB_end=branches_.end();
438  for(;iB!=iB_end;++iB){
439  std::vector<TreeBranch>::iterator iL=iB->second.begin();
440  std::vector<TreeBranch>::iterator iL_end=iB->second.end();
441  uint maxS=0;
442  for(;iL!=iL_end;++iL){
443  TreeBranch & b=*iL;
444  std::unique_ptr<std::vector<float> > branch(b.branch(iEvent));
445  if (branch->size()>maxS) maxS=branch->size();
446  iEvent.put(std::move(branch), b.branchName());
447  }
448  //index should be put only once per branch. doe not really mattter for edm root files
449  iEvent.put(std::make_unique<uint>(maxS), iB->first);
450  }
451  }
452  }
453 
454  void callBack()
455  {
456  if (useTFileService_){
457  Branches::iterator iB=branches_.begin();
458  Branches::iterator iB_end=branches_.end();
459  //de-allocate memory now: allocated in branch(...) and released to the pointer.
460  for(;iB!=iB_end;++iB){
461  std::vector<TreeBranch>::iterator iL=iB->second.begin();
462  std::vector<TreeBranch>::iterator iL_end=iB->second.end();
463  for(;iL!=iL_end;++iL){
464  TreeBranch & b=*iL;
465  delete b.dataHolderPtr();
466  }
467  }
468  }
469  }
470 
472  delete indexDataHolder_;
473  delete ev_;
474  delete run_;
475  delete lumiblock_;
476  delete experimentType_;
477  delete bunchCrossing_;
478  delete orbitNumber_;
479  delete weight_;
480  delete model_params_;
481  }
482 
483  protected:
484  typedef std::map<std::string, std::vector<TreeBranch> > Branches;
485  Branches branches_;
486 
490 
491  //event info
493  uint * run_;
494  uint * lumiblock_;
497  uint * orbitNumber_;
498  float * weight_;
500 
501 };
502 
503 
504 #endif
const std::string & branchTitle() const
#define LogDebug(id)
RunNumber_t run() const
Definition: EventID.h:39
T getParameter(std::string const &) const
EventNumber_t event() const
Definition: EventID.h:41
T getUntrackedParameter(std::string const &, T const &) const
string separator
Definition: mps_merge.py:77
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
Definition: Event.h:122
const std::string & branchAlias() const
std::vector< float > * dataHolderPtr()
const std::string branchName() const
value branch(const edm::Event &iEvent)
std::string order_
bool exists(std::string const &parameterName) const
checks if a parameter exists
int bunchCrossing() const
Definition: EventBase.h:66
def replace(string, replacements)
edm::LuminosityBlockNumber_t luminosityBlock() const
Definition: EventBase.h:63
T * make(const Args &...args) const
make new ROOT object
Definition: TFileService.h:64
std::string branchTitle_
std::string maxIndexName_
TreeBranch::value value
StringLeaveHelper(const TreeBranch &B, const edm::Event &iEvent)
const std::string & selection() const
double weight() const
bool isRealData() const
Definition: EventBase.h:64
std::string encode() const
Definition: InputTag.cc:165
std::vector< float > * dataHolderPtr_
const std::string & order() const
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
Definition: ParameterSet.h:194
std::string * model_params_
U second(std::pair< T, U > const &p)
uint registerleaves(edm::ProducerBase *producer)
int iEvent
Definition: GenABIO.cc:230
const edm::InputTag & src() const
comments_const_iterator comments_begin() const
TypeLabelItem const & produces()
declare what type of product will make and with which optional label
std::map< std::string, std::vector< TreeBranch > > Branches
TreeBranch(std::string C, edm::InputTag S, std::string E, std::string O, std::string SE, std::string Mi, std::string Ba)
void fill(edm::Event &iEvent)
std::string branchAlias_
int orbitNumber() const
Definition: EventBase.h:67
static const std::string B
std::unique_ptr< std::vector< float > > value
std::string expr_
bool getByLabel(InputTag const &tag, Handle< PROD > &result) const
Definition: Event.h:416
TreeBranch::value value
edm::InputTag src_
void assignDataHolderPtr(std::vector< float > *data)
bool failedToGet() const
Definition: HandleBase.h:78
TFile & file() const
return opened TFile
Definition: TFileService.h:37
unsigned long long uint64_t
Definition: Time.h:15
double b
Definition: hdecay.h:120
double S(const TLorentzVector &, const TLorentzVector &)
Definition: Particle.cc:99
std::string selection_
const std::string & className() const
edm::EventID id() const
Definition: EventBase.h:60
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
comments_const_iterator comments_end() const
edm::EventAuxiliary::ExperimentType experimentType() const
Definition: EventBase.h:65
const std::string & maxIndexName() const
std::string class_
size_t getParameterSetNames(std::vector< std::string > &output, bool trackiness=true) const
const std::string & expr() const
StringBranchHelper(const TreeBranch &B, const edm::Event &iEvent)
def move(src, dest)
Definition: eostools.py:510
StringBasedNTupler(const edm::ParameterSet &iConfig)
std::vector< float > ** dataHolderPtrAdress()