CMS 3D CMS Logo

RootPrimaryFileSequence.cc
Go to the documentation of this file.
1 /*----------------------------------------------------------------------
2 ----------------------------------------------------------------------*/
3 #include "DuplicateChecker.h"
4 #include "InputFile.h"
5 #include "PoolSource.h"
6 #include "RootFile.h"
8 #include "RootTree.h"
9 
19 
20 namespace edm {
24  : RootInputFileSequence(pset, catalog),
25  input_(input),
26  firstFile_(true),
27  branchesMustMatch_(BranchDescription::Permissive),
28  orderedProcessHistoryIDs_(),
29  eventSkipperByID_(EventSkipperByID::create(pset).release()),
30  initialNumberOfEventsToSkip_(pset.getUntrackedParameter<unsigned int>("skipEvents")),
31  noEventSort_(pset.getUntrackedParameter<bool>("noEventSort")),
32  treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter<unsigned int>("cacheSize") : 0U),
33  duplicateChecker_(new DuplicateChecker(pset)),
34  usingGoToEvent_(false),
35  enablePrefetching_(false),
36  enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName")) {
37  // The SiteLocalConfig controls the TTreeCache size and the prefetching settings.
39  if (pSLC.isAvailable()) {
40  if (treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) {
41  treeCacheSize_ = *(pSLC->sourceTTreeCacheSize());
42  }
43  enablePrefetching_ = pSLC->enablePrefetching();
44  }
45 
46  std::string branchesMustMatch =
47  pset.getUntrackedParameter<std::string>("branchesMustMatch", std::string("permissive"));
48  if (branchesMustMatch == std::string("strict"))
50 
51  // Prestage the files
54  }
55  // Open the first file.
58  if (rootFile())
59  break;
60  }
61  if (rootFile()) {
62  input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList());
65  }
66  }
67  }
68 
70 
72 
73  std::unique_ptr<FileBlock> RootPrimaryFileSequence::readFile_() {
74  if (firstFile_) {
75  // The first input file has already been opened.
76  firstFile_ = false;
77  if (!rootFile()) {
79  }
80  } else {
81  if (!nextFile()) {
82  assert(0);
83  }
84  }
85  if (!rootFile()) {
86  return std::make_unique<FileBlock>();
87  }
88  return rootFile()->createFileBlock();
89  }
90 
92  // close the currently open file, if any, and delete the RootFile object.
93  if (rootFile()) {
94  auto sentry = std::make_unique<InputSource::FileCloseSentry>(input_, lfn(), usedFallback());
95  rootFile()->close();
97  duplicateChecker_->inputFileClosed();
98  rootFile().reset();
99  }
100  }
101 
103  // If we are not duplicate checking across files and we are not using random access to find events,
104  // then we can delete the IndexIntoFile for the file we are closing.
105  // If we can't delete all of it, then we can delete the parts we do not need.
106  bool deleteIndexIntoFile = !usingGoToEvent_ && !(duplicateChecker_ && duplicateChecker_->checkingAllFiles() &&
107  !duplicateChecker_->checkDisabled());
108  initTheFile(skipBadFiles, deleteIndexIntoFile, &input_, "primaryFiles", InputType::Primary);
109  }
110 
112  size_t currentIndexIntoFile = sequenceNumberOfFile();
113  return std::make_shared<RootFile>(fileName(),
115  logicalFileName(),
116  filePtr,
119  remainingEvents(),
121  input_.nStreams(),
125  input_.runHelper(),
126  noEventSort_,
131  nullptr, // associationsFromSecondary
136  currentIndexIntoFile,
143  }
144 
146  if (!noMoreFiles())
147  setAtNextFile();
148  if (noMoreFiles()) {
149  return false;
150  }
151 
153 
154  if (rootFile()) {
155  // make sure the new product registry is compatible with the main one
156  std::string mergeInfo =
158  if (!mergeInfo.empty()) {
159  throw Exception(errors::MismatchedInputFiles, "RootPrimaryFileSequence::nextFile()") << mergeInfo;
160  }
161  }
162  return true;
163  }
164 
166  if (atFirstFile()) {
167  return false;
168  }
170 
171  initFile(false);
172 
173  if (rootFile()) {
174  // make sure the new product registry is compatible to the main one
175  std::string mergeInfo =
177  if (!mergeInfo.empty()) {
178  throw Exception(errors::MismatchedInputFiles, "RootPrimaryFileSequence::previousEvent()") << mergeInfo;
179  }
180  }
181  if (rootFile())
182  rootFile()->setToLastEntry();
183  return true;
184  }
185 
188  EventNumber_t& event) {
189  if (noMoreFiles()) {
190  return InputSource::IsStop;
191  }
192  if (firstFile_) {
193  return InputSource::IsFile;
194  }
195  if (rootFile()) {
196  IndexIntoFile::EntryType entryType = rootFile()->getNextItemType(run, lumi, event);
197  if (entryType == IndexIntoFile::kEvent) {
198  return InputSource::IsEvent;
199  } else if (entryType == IndexIntoFile::kLumi) {
200  return InputSource::IsLumi;
201  } else if (entryType == IndexIntoFile::kRun) {
202  return InputSource::IsRun;
203  }
204  assert(entryType == IndexIntoFile::kEnd);
205  }
206  if (atLastFile()) {
207  return InputSource::IsStop;
208  }
209  return InputSource::IsFile;
210  }
211 
212  // Rewind to before the first event that was read.
214  if (!atFirstFile()) {
215  closeFile_();
216  setAtFirstFile();
217  }
218  if (!rootFile()) {
219  initFile(false);
220  }
221  rewindFile();
222  firstFile_ = true;
223  if (rootFile()) {
224  if (initialNumberOfEventsToSkip_ != 0) {
226  }
227  }
228  }
229 
230  // Rewind to the beginning of the current file
232  if (rootFile())
233  rootFile()->rewind();
234  }
235 
236  // Advance "offset" events. Offset can be positive or negative (or zero).
238  assert(rootFile());
239  while (offset != 0) {
240  bool atEnd = rootFile()->skipEvents(offset);
241  if ((offset > 0 || atEnd) && !nextFile()) {
242  return false;
243  }
244  if (offset < 0 && !previousFile()) {
245  setNoMoreFiles();
246  return false;
247  }
248  }
249  return true;
250  }
251 
253  usingGoToEvent_ = true;
254  if (rootFile()) {
255  if (rootFile()->goToEvent(eventID)) {
256  return true;
257  }
258  // If only one input file, give up now, to save time.
259  if (rootFile() && indexesIntoFiles().size() == 1) {
260  return false;
261  }
262  // Save the current file and position so that we can restore them
263  // if we fail to restore the desired event
264  bool closedOriginalFile = false;
265  size_t const originalFileSequenceNumber = sequenceNumberOfFile();
266  IndexIntoFile::IndexIntoFileItr originalPosition = rootFile()->indexIntoFileIter();
267 
268  // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files.
269  for (auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) {
270  if (*it && (*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) {
271  // We found it. Close the currently open file, and open the correct one.
273  initFile(false);
274  // Now get the item from the correct file.
275  assert(rootFile());
276  bool found = rootFile()->goToEvent(eventID);
277  assert(found);
278  return true;
279  }
280  }
281  // Look for item in files not yet opened.
282  for (auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) {
283  if (!*it) {
285  initFile(false);
286  closedOriginalFile = true;
287  if ((*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) {
288  assert(rootFile());
289  if (rootFile()->goToEvent(eventID)) {
290  return true;
291  }
292  }
293  }
294  }
295  if (closedOriginalFile) {
296  setAtFileSequenceNumber(originalFileSequenceNumber);
297  initFile(false);
298  assert(rootFile());
299  rootFile()->setPosition(originalPosition);
300  }
301  }
302  return false;
303  }
304 
306 
308 
310  desc.addUntracked<unsigned int>("skipEvents", 0U)
311  ->setComment("Skip the first 'skipEvents' events that otherwise would have been processed.");
312  desc.addUntracked<bool>("noEventSort", true)
313  ->setComment(
314  "True: Process runs, lumis and events in the order they appear in the file (but see notes 1 and 2).\n"
315  "False: Process runs, lumis and events in each file in numerical order (run#, lumi#, event#) (but see note "
316  "3).\n"
317  "Note 1: Events within the same lumi will always be processed contiguously.\n"
318  "Note 2: Lumis within the same run will always be processed contiguously.\n"
319  "Note 3: Any sorting occurs independently in each input file (no sorting across input files).");
320  desc.addUntracked<unsigned int>("cacheSize", roottree::defaultCacheSize)
321  ->setComment("Size of ROOT TTree prefetch cache. Affects performance.");
322  std::string defaultString("permissive");
323  desc.addUntracked<std::string>("branchesMustMatch", defaultString)
324  ->setComment(
325  "'strict': Branches in each input file must match those in the first file.\n"
326  "'permissive': Branches in each input file may be any subset of those in the first file.");
327  desc.addUntracked<bool>("enforceGUIDInFileName", false)
328  ->setComment(
329  "True: file name part is required to be equal to the GUID of the file\n"
330  "False: file name can be anything");
331 
334  }
335 
337  if (rootFile()) {
338  if (!rootFile()->wasLastEventJustRead()) {
340  }
341  if (noMoreFiles() || atLastFile()) {
343  } else {
345  }
346  }
348  }
349 
351  if (rootFile()) {
352  if (!rootFile()->wasFirstEventJustRead()) {
354  }
355  if (!atFirstFile()) {
357  }
359  }
361  }
362 
363 } // namespace edm
RunNumber_t run() const
Definition: EventID.h:39
size
Write out results.
EventNumber_t event() const
Definition: EventID.h:41
T getUntrackedParameter(std::string const &, T const &) const
std::string const & logicalFileName() const
void initFile(bool skipBadFiles)
ProductRegistry & productRegistryUpdate()
Definition: InputSource.h:326
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
std::string const & fileName() const
InputSource::ItemType getNextItemType(RunNumber_t &run, LuminosityBlockNumber_t &lumi, EventNumber_t &event)
void setAtFileSequenceNumber(size_t offset)
def create(alignables, pedeDump, additionalData, outputFile, config)
void initFile_(bool skipBadFiles) override
ProcessingController::ForwardState forwardState() const
unsigned long long EventNumber_t
std::vector< std::shared_ptr< IndexIntoFile > > const & indexesIntoFiles() const
ProcessingMode processingMode() const
RunsLumisAndEvents (default), RunsAndLumis, or Runs.
Definition: InputSource.h:233
static void fillDescription(ParameterSetDescription &desc)
static void fillDescription(ParameterSetDescription &desc)
std::unique_ptr< FileBlock > readFile_()
LuminosityBlockNumber_t luminosityBlock() const
Definition: EventID.h:40
unsigned int const defaultCacheSize
Definition: RootTree.h:46
unsigned int LuminosityBlockNumber_t
static std::string const input
Definition: EdmProvDump.cc:48
RunHelperBase * runHelper()
Definition: PoolSource.h:47
ProcessingController::ReverseState reverseState() const
std::shared_ptr< RootFile > RootFileSharedPtr
bool goToEvent(EventID const &eventID)
static const StorageFactory * get(void)
int remainingEvents() const
Definition: InputSource.h:187
void stagein(const std::string &url) const
std::string const & lfn() const
RootFileSharedPtr makeRootFile(std::shared_ptr< InputFile > filePtr) override
void initTheFile(bool skipBadFiles, bool deleteIndexIntoFile, InputSource *input, char const *inputTypeName, InputType inputType)
std::vector< ProcessHistoryID > orderedProcessHistoryIDs_
bool isAvailable() const
Definition: Service.h:40
RootPrimaryFileSequence(ParameterSet const &pset, PoolSource &input, InputFileCatalog const &catalog)
std::string merge(ProductRegistry const &other, std::string const &fileName, BranchDescription::MatchMode branchesMustMatch=BranchDescription::Permissive)
unsigned int nStreams() const
Definition: PoolSource.h:44
bool dropDescendants() const
Definition: PoolSource.h:41
bool bypassVersionCheck() const
Definition: PoolSource.h:42
#define end
Definition: vmac.h:39
std::shared_ptr< RootFile const > rootFile() const
int treeMaxVirtualSize() const
Definition: PoolSource.h:45
bool skipBadFiles() const
Definition: PoolSource.h:40
std::shared_ptr< EventSkipperByID const > eventSkipperByID() const
std::shared_ptr< BranchIDListHelper const > branchIDListHelper() const
Accessors for branchIDListHelper.
Definition: InputSource.h:159
bool labelRawDataLikeMC() const
Definition: PoolSource.h:43
edm::propagate_const< std::shared_ptr< DuplicateChecker > > duplicateChecker_
ProcessHistoryRegistry & processHistoryRegistryForUpdate()
Definition: InputSource.h:327
BranchDescription::MatchMode branchesMustMatch_
static void fillDescription(ParameterSetDescription &desc)
#define begin
Definition: vmac.h:32
HLT enums.
std::shared_ptr< ThinnedAssociationsHelper const > thinnedAssociationsHelper() const
Accessors for thinnedAssociationsHelper.
Definition: InputSource.h:165
void updateFromInput(ProductList const &other)
int remainingLuminosityBlocks() const
Definition: InputSource.h:195
unsigned int RunNumber_t
ProcessConfiguration const & processConfiguration() const
Accessor for Process Configuration.
Definition: InputSource.h:201
std::shared_ptr< DuplicateChecker const > duplicateChecker() const
Definition: event.py:1
ProductSelectorRules const & productSelectorRules() const
Definition: PoolSource.h:46