CMS 3D CMS Logo

RootOutputTree.cc
Go to the documentation of this file.
1 
2 #include "RootOutputTree.h"
3 
14 
15 #include "TBranch.h"
16 #include "TBranchElement.h"
17 #include "TCollection.h"
18 #include "TFile.h"
19 #include "TTreeCloner.h"
20 #include "Rtypes.h"
21 #include "RVersion.h"
22 
23 #include <limits>
24 
25 #include "tbb/task_arena.h"
26 
27 namespace edm {
28 
42  public:
43  DuplicateTreeSentry(TTree* tree) : tree_(tree) { dup(); }
44 
45  TTree* tree() const { return mytree_ ? mytree_.get() : tree_; }
46 
47  private:
48  DuplicateTreeSentry(DuplicateTreeSentry const&) = delete; // Disallow copying and moving
51  void operator()(TFile* iFile) const {
52  if (iFile) {
53  iFile->Close();
54  }
55  delete iFile;
56  }
57  };
58 
59  void dup() {
61  if (!pSLC.isAvailable()) {
62  return;
63  }
64  if (pSLC->sourceCacheHint() && *(pSLC->sourceCacheHint()) == "lazy-download") {
65  return;
66  }
67  if (!pSLC->sourceCloneCacheHint() || *(pSLC->sourceCloneCacheHint()) != "lazy-download") {
68  return;
69  }
70  edm::LogWarning("DuplicateTreeSentry") << "Re-opening file for fast-cloning";
71 
72  TFile* file = tree_->GetCurrentFile();
73  const TUrl* url = file->GetEndpointUrl();
74  if (!url) {
75  return;
76  }
77  file_.reset(TFile::Open(url->GetUrl(), "READWRAP")); // May throw an exception.
78  if (!file_) {
79  return;
80  }
81  mytree_.reset(dynamic_cast<TTree*>(file_->Get(tree_->GetName())));
82  if (!mytree_) {
83  return;
84  }
85  }
86 
91  std::unique_ptr<TFile, CloseBeforeDelete> file_;
92  TTree* tree_ = nullptr;
93  std::unique_ptr<TTree> mytree_ = nullptr;
94  };
95 
96  RootOutputTree::RootOutputTree(std::shared_ptr<TFile> filePtr,
97  BranchType const& branchType,
98  int splitLevel,
99  int treeMaxVirtualSize)
100  : filePtr_(filePtr),
101  tree_(makeTTree(filePtr.get(), BranchTypeToProductTreeName(branchType), splitLevel)),
102  producedBranches_(),
103  readBranches_(),
104  auxBranches_(),
105  unclonedReadBranches_(),
106  clonedReadBranchNames_(),
107  currentlyFastCloning_(),
108  fastCloneAuxBranches_(false) {
109  if (treeMaxVirtualSize >= 0)
110  tree_->SetMaxVirtualSize(treeMaxVirtualSize);
111  }
112 
113  TTree* RootOutputTree::assignTTree(TFile* filePtr, TTree* tree) {
114  tree->SetDirectory(filePtr);
115  // Turn off autosaving because it is such a memory hog and we are not using
116  // this check-pointing feature anyway.
117  tree->SetAutoSave(std::numeric_limits<Long64_t>::max());
118  return tree;
119  }
120 
121  TTree* RootOutputTree::makeTTree(TFile* filePtr, std::string const& name, int splitLevel) {
122  TTree* tree = new TTree(name.c_str(), "", splitLevel);
123  if (!tree)
124  throw edm::Exception(errors::FatalRootError) << "Failed to create the tree: " << name << "\n";
125  if (tree->IsZombie())
126  throw edm::Exception(errors::FatalRootError) << "Tree: " << name << " is a zombie."
127  << "\n";
128 
129  return assignTTree(filePtr, tree);
130  }
131 
132  bool RootOutputTree::checkSplitLevelsAndBasketSizes(TTree* inputTree) const {
133  assert(inputTree != nullptr);
134 
135  // Do the split level and basket size match in the input and output?
136  for (auto const& outputBranch : readBranches_) {
137  if (outputBranch != nullptr) {
138  TBranch* inputBranch = inputTree->GetBranch(outputBranch->GetName());
139 
140  if (inputBranch != nullptr) {
141  if (inputBranch->GetSplitLevel() != outputBranch->GetSplitLevel() ||
142  inputBranch->GetBasketSize() != outputBranch->GetBasketSize()) {
143  return false;
144  }
145  }
146  }
147  }
148  return true;
149  }
150 
151  namespace {
152  bool checkMatchingBranches(TBranchElement* inputBranch, TBranchElement* outputBranch) {
153  if (inputBranch->GetStreamerType() != outputBranch->GetStreamerType()) {
154  return false;
155  }
156  TObjArray* inputArray = inputBranch->GetListOfBranches();
157  TObjArray* outputArray = outputBranch->GetListOfBranches();
158 
159  if (outputArray->GetSize() < inputArray->GetSize()) {
160  return false;
161  }
162  TIter iter(outputArray);
163  TObject* obj = nullptr;
164  while ((obj = iter.Next()) != nullptr) {
165  TBranchElement* outBranch = dynamic_cast<TBranchElement*>(obj);
166  if (outBranch) {
167  TBranchElement* inBranch = dynamic_cast<TBranchElement*>(inputArray->FindObject(outBranch->GetName()));
168  if (!inBranch) {
169  return false;
170  }
171  if (!checkMatchingBranches(inBranch, outBranch)) {
172  return false;
173  }
174  }
175  }
176  return true;
177  }
178  } // namespace
179 
180  bool RootOutputTree::checkIfFastClonable(TTree* inputTree) const {
181  if (inputTree == nullptr)
182  return false;
183 
184  // Do the sub-branches match in the input and output. Extra sub-branches in the input are OK for fast cloning, but not in the output.
185  for (auto const& outputBr : readBranches_) {
186  TBranchElement* outputBranch = dynamic_cast<TBranchElement*>(outputBr);
187  if (outputBranch != nullptr) {
188  TBranchElement* inputBranch = dynamic_cast<TBranchElement*>(inputTree->GetBranch(outputBranch->GetName()));
189  if (inputBranch != nullptr) {
190  // We have a matching top level branch. Do the recursive check on subbranches.
191  if (!checkMatchingBranches(inputBranch, outputBranch)) {
192  LogInfo("FastCloning") << "Fast Cloning disabled because a data member has been added to split branch: "
193  << inputBranch->GetName() << "\n.";
194  return false;
195  }
196  }
197  }
198  }
199  return true;
200  }
201 
202  bool RootOutputTree::checkEntriesInReadBranches(Long64_t expectedNumberOfEntries) const {
203  for (auto const& readBranch : readBranches_) {
204  if (readBranch->GetEntries() != expectedNumberOfEntries) {
205  return false;
206  }
207  }
208  return true;
209  }
210 
212  if (in->GetEntries() != 0) {
213  TObjArray* branches = tree_->GetListOfBranches();
214  // If any products were produced (not just event products), the EventAuxiliary will be modified.
215  // In that case, don't fast copy auxiliary branches. Remove them, and add back after fast copying.
216  std::map<Int_t, TBranch*> auxIndexes;
217  bool mustRemoveSomeAuxs = false;
218  if (!fastCloneAuxBranches_) {
219  for (auto const& auxBranch : auxBranches_) {
220  int auxIndex = branches->IndexOf(auxBranch);
221  assert(auxIndex >= 0);
222  auxIndexes.insert(std::make_pair(auxIndex, auxBranch));
223  branches->RemoveAt(auxIndex);
224  }
225  mustRemoveSomeAuxs = true;
226  }
227 
228  //Deal with any aux branches which can never be cloned
229  for (auto const& auxBranch : unclonedAuxBranches_) {
230  int auxIndex = branches->IndexOf(auxBranch);
231  assert(auxIndex >= 0);
232  auxIndexes.insert(std::make_pair(auxIndex, auxBranch));
233  branches->RemoveAt(auxIndex);
234  mustRemoveSomeAuxs = true;
235  }
236 
237  if (mustRemoveSomeAuxs) {
238  branches->Compress();
239  }
240 
241  DuplicateTreeSentry dupTree(in);
242  TTreeCloner cloner(
243  dupTree.tree(), tree_, option.c_str(), TTreeCloner::kNoWarnings | TTreeCloner::kIgnoreMissingTopLevel);
244 
245  if (!cloner.IsValid()) {
246  // Let's check why
247  static const char* okerror = "One of the export branch";
248  if (strncmp(cloner.GetWarning(), okerror, strlen(okerror)) == 0) {
249  // That's fine we will handle it;
250  } else {
251  throw edm::Exception(errors::FatalRootError) << "invalid TTreeCloner (" << cloner.GetWarning() << ")\n";
252  }
253  }
254  tree_->SetEntries(tree_->GetEntries() + in->GetEntries());
255  Service<RootHandlers> rootHandler;
256  rootHandler->ignoreWarningsWhileDoing([&cloner] { cloner.Exec(); });
257 
258  if (mustRemoveSomeAuxs) {
259  for (auto const& auxIndex : auxIndexes) {
260  // Add the auxiliary branches back after fast copying the rest of the tree.
261  Int_t last = branches->GetLast();
262  if (last >= 0) {
263  branches->AddAtAndExpand(branches->At(last), last + 1);
264  for (Int_t ind = last - 1; ind >= auxIndex.first; --ind) {
265  branches->AddAt(branches->At(ind), ind + 1);
266  };
267  branches->AddAt(auxIndex.second, auxIndex.first);
268  } else {
269  branches->Add(auxIndex.second);
270  }
271  }
272  }
273  }
274  }
275 
277  if (tree->GetNbranches() != 0) {
278  // This is required when Fill is called on individual branches
279  // in the TTree instead of calling Fill once for the entire TTree.
280  tree->SetEntries(-1);
281  }
282  setRefCoreStreamer(true);
283  tree->AutoSave("FlushBaskets");
284  }
285 
286  void RootOutputTree::fillTTree(std::vector<TBranch*> const& branches) {
287  for_all(branches, std::bind(&TBranch::Fill, std::placeholders::_1));
288  }
289 
291 
292  void RootOutputTree::maybeFastCloneTree(bool canFastClone,
293  bool canFastCloneAux,
294  TTree* tree,
295  std::string const& option) {
296  unclonedReadBranches_.clear();
297  clonedReadBranchNames_.clear();
298  currentlyFastCloning_ = canFastClone && !readBranches_.empty();
299  if (currentlyFastCloning_) {
300  fastCloneAuxBranches_ = canFastCloneAux;
301  fastCloneTTree(tree, option);
302  for (auto const& branch : readBranches_) {
303  if (branch->GetEntries() == tree_->GetEntries()) {
304  clonedReadBranchNames_.insert(std::string(branch->GetName()));
305  } else {
306  unclonedReadBranches_.push_back(branch);
307  }
308  }
309  Service<JobReport> reportSvc;
310  reportSvc->reportFastClonedBranches(clonedReadBranchNames_, tree_->GetEntries());
311  }
312  }
313 
315  if (currentlyFastCloning_) {
321  } else {
322  // Isolate the fill operation so that IMT doesn't grab other large tasks
323  // that could lead to PoolOutputModule stalling
324  tbb::this_task_arena::isolate([&] { tree_->Fill(); });
325  }
326  }
327 
329  std::string const& className,
330  void const*& pProd,
331  int splitLevel,
332  int basketSize,
333  bool produced) {
334  assert(splitLevel != BranchDescription::invalidSplitLevel);
335  assert(basketSize != BranchDescription::invalidBasketSize);
336  TBranch* branch = tree_->Branch(branchName.c_str(), className.c_str(), &pProd, basketSize, splitLevel);
337  assert(branch != nullptr);
338  /*
339  if(pProd != nullptr) {
340  // Delete the product that ROOT has allocated.
341  WrapperBase const* edp = static_cast<WrapperBase const *>(pProd);
342  delete edp;
343  pProd = nullptr;
344  }
345 */
346  if (produced) {
347  producedBranches_.push_back(branch);
348  } else {
349  readBranches_.push_back(branch);
350  }
351  }
352 
354  // The TFile was just closed.
355  // Just to play it safe, zero all pointers to quantities in the file.
356  auxBranches_.clear();
357  unclonedAuxBranches_.clear();
358  producedBranches_.clear();
359  readBranches_.clear();
360  unclonedReadBranches_.clear();
361  tree_ = nullptr; // propagate_const<T> has no reset() function
362  filePtr_ = nullptr; // propagate_const<T> has no reset() function
363  }
364 } // namespace edm
virtual std::string const * sourceCacheHint() const =0
std::set< std::string > clonedReadBranchNames_
edm::propagate_const< TTree * > tree_
static int const invalidSplitLevel
DuplicateTreeSentry(TTree *tree)
static int const invalidBasketSize
static void fillTTree(std::vector< TBranch * > const &branches)
bool checkSplitLevelsAndBasketSizes(TTree *inputTree) const
TTree const * tree() const
void setRefCoreStreamer(bool resetAll=false)
std::vector< TBranch * > unclonedAuxBranches_
edm::propagate_const< std::shared_ptr< TFile > > filePtr_
bool checkIfFastClonable(TTree *inputTree) const
BranchType
Definition: BranchType.h:11
std::vector< TBranch * > producedBranches_
std::vector< TBranch * > auxBranches_
Func for_all(ForwardSequence &s, Func f)
wrapper for std::for_each
Definition: Algorithms.h:14
std::unique_ptr< TTree > mytree_
RootOutputTree(std::shared_ptr< TFile > filePtr, BranchType const &branchType, int splitLevel, int treeMaxVirtualSize)
static TTree * assignTTree(TFile *file, TTree *tree)
void addBranch(std::string const &branchName, std::string const &className, void const *&pProd, int splitLevel, int basketSize, bool produced)
void Fill(HcalDetId &id, double val, std::vector< TH2F > &depth)
bool isAvailable() const
Definition: Service.h:40
virtual std::string const * sourceCloneCacheHint() const =0
std::vector< TBranch * > unclonedReadBranches_
bool checkEntriesInReadBranches(Long64_t expectedNumberOfEntries) const
std::string const & BranchTypeToProductTreeName(BranchType const &branchType)
Definition: BranchType.cc:104
static TTree * makeTTree(TFile *filePtr, std::string const &name, int splitLevel)
T const & get(Event const &event, InputTag const &tag)(false)
Definition: Event.h:658
DuplicateTreeSentry & operator=(DuplicateTreeSentry const &)=delete
static void writeTTree(TTree *tree)
std::vector< TBranch * > readBranches_
HLT enums.
Definition: tree.py:1
void maybeFastCloneTree(bool canFastClone, bool canFastCloneAux, TTree *tree, std::string const &option)
def branchType(schema, name)
Definition: revisionDML.py:114
std::string className(const T &t)
Definition: ClassName.h:30
void fastCloneTTree(TTree *in, std::string const &option)
std::unique_ptr< TFile, CloseBeforeDelete > file_