CMS 3D CMS Logo

RootEmbeddedFileSequence.cc
Go to the documentation of this file.
1 /*----------------------------------------------------------------------
2 ----------------------------------------------------------------------*/
3 #include "EmbeddedRootSource.h"
4 #include "InputFile.h"
5 #include "RootFile.h"
7 #include "RootTree.h"
8 
18 
19 #include "CLHEP/Random/RandFlat.h"
20 
21 #include <random>
22 
23 namespace edm {
24  class EventPrincipal;
25 
30  input_(input),
31  orderedProcessHistoryIDs_(),
32  sequential_(pset.getUntrackedParameter<bool>("sequential", false)),
33  sameLumiBlock_(pset.getUntrackedParameter<bool>("sameLumiBlock", false)),
34  fptr_(nullptr),
35  eventsRemainingInFile_(0),
36  // The default value provided as the second argument to the getUntrackedParameter function call
37  // is not used when the ParameterSet has been validated and the parameters are not optional
38  // in the description. This is currently true when PoolSource is the primary input source.
39  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
40  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
41  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
42  // and should be deleted from the code.
43  initialNumberOfEventsToSkip_(pset.getUntrackedParameter<unsigned int>("skipEvents", 0U)),
44  treeCacheSize_(pset.getUntrackedParameter<unsigned int>("cacheSize", roottree::defaultCacheSize)),
45  enablePrefetching_(false),
46  enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName", false)) {
47  if (noFiles()) {
49  << "RootEmbeddedFileSequence no input files specified for secondary input source.\n";
50  }
51  //
52  // The SiteLocalConfig controls the TTreeCache size and the prefetching settings.
54  if (pSLC.isAvailable()) {
55  if (treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) {
56  treeCacheSize_ = *(pSLC->sourceTTreeCacheSize());
57  }
58  enablePrefetching_ = pSLC->enablePrefetching();
59  }
60 
61  // Set the pointer to the function that reads an event.
62  if (sameLumiBlock_) {
63  if (sequential_) {
65  } else {
67  }
68  } else {
69  if (sequential_) {
71  } else {
73  }
74  }
75 
76  // For the secondary input source we do not stage in.
77  if (sequential_) {
78  // We open the first file
79  if (!atFirstFile()) {
81  initFile(false);
82  }
83  assert(rootFile());
84  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
85  if (!sameLumiBlock_) {
87  }
88  } else {
89  // We randomly choose the first file to open.
90  // We cannot use the random number service yet.
91  std::ifstream f("/dev/urandom");
92  unsigned int seed;
93  f.read(reinterpret_cast<char*>(&seed), sizeof(seed));
94  std::default_random_engine dre(seed);
95  size_t count = numberOfFiles();
96  std::uniform_int_distribution<int> distribution(0, count - 1);
97  while (!rootFile() && count != 0) {
98  --count;
99  int offset = distribution(dre);
102  }
103  }
104  if (rootFile()) {
105  input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList());
106  }
107  }
108 
110 
112 
114  // delete the RootFile object.
115  if (rootFile()) {
116  rootFile().reset();
117  }
118  }
119 
121  initTheFile(skipBadFiles, false, nullptr, "mixingFiles", InputType::SecondarySource);
122  }
123 
125  std::shared_ptr<InputFile> filePtr) {
126  size_t currentIndexIntoFile = sequenceNumberOfFile();
127  return std::make_shared<RootFile>(fileNames()[0],
129  logicalFileName(),
130  filePtr,
131  input_.nStreams(),
134  input_.runHelper(),
139  currentIndexIntoFile,
144  }
145 
147  // offset is decremented by the number of events actually skipped.
148  bool completed = rootFile()->skipEntries(offset);
149  while (!completed) {
150  setAtNextFile();
151  if (noMoreFiles()) {
152  setAtFirstFile();
153  }
154  initFile(false);
155  assert(rootFile());
156  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
157  completed = rootFile()->skipEntries(offset);
158  }
159  }
160 
162  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const*, bool recycleFiles) {
163  assert(rootFile());
164  rootFile()->nextEventEntry();
165  bool found = rootFile()->readCurrentEvent(cache);
166  if (!found) {
167  setAtNextFile();
168  if (noMoreFiles()) {
169  if (recycleFiles) {
170  setAtFirstFile();
171  } else {
172  return false;
173  }
174  }
175  initFile(false);
176  assert(rootFile());
177  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
178  return readOneSequential(cache, fileNameHash, nullptr, nullptr, recycleFiles);
179  }
180  fileNameHash = lfnHash();
181  return true;
182  }
183 
185  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const* idp, bool recycleFiles) {
186  assert(idp);
187  EventID const& id = *idp;
190  if (offset > 0) {
191  assert(rootFile());
192  while (offset > 0) {
193  bool found = readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles);
194  if (!found) {
195  return false;
196  }
197  --offset;
198  }
199  }
200  assert(rootFile());
201  if (noMoreFiles() || rootFile()->indexIntoFileIter().run() != id.run() ||
202  rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) {
203  bool found = skipToItem(id.run(), id.luminosityBlock(), 0, 0, false);
204  if (!found) {
205  return false;
206  }
207  }
208  assert(rootFile());
209  bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
210  if (found) {
211  found = rootFile()->readCurrentEvent(cache);
212  }
213  if (!found) {
214  found = skipToItemInNewFile(id.run(), id.luminosityBlock(), 0);
215  if (!found) {
216  return false;
217  }
218  return readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles);
219  }
220  fileNameHash = lfnHash();
221  return true;
222  }
223 
225  size_t& fileNameHash,
227  EventID const& id = idx.eventID();
228  bool found = skipToItem(id.run(), id.luminosityBlock(), id.event(), idx.fileNameHash());
229  if (!found) {
230  throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneSpecified(): Secondary Input files"
231  << " do not contain specified event:\n"
232  << id << "\n";
233  }
234  assert(rootFile());
235  found = rootFile()->readCurrentEvent(cache);
236  assert(found);
237  fileNameHash = idx.fileNameHash();
238  if (fileNameHash == 0U) {
239  fileNameHash = lfnHash();
240  }
241  }
242 
244  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const*, bool) {
245  assert(rootFile());
246  assert(engine);
247  unsigned int currentSeqNumber = sequenceNumberOfFile();
248  while (eventsRemainingInFile_ == 0) {
249  unsigned int newSeqNumber = CLHEP::RandFlat::shootInt(engine, fileCatalogItems().size());
250  setAtFileSequenceNumber(newSeqNumber);
251  if (newSeqNumber != currentSeqNumber) {
252  initFile(false);
253  currentSeqNumber = newSeqNumber;
254  }
255  eventsRemainingInFile_ = rootFile()->eventTree().entries();
256  if (eventsRemainingInFile_ == 0) {
257  throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file "
258  << fileNames()[0] << " contains no events.\n";
259  }
260  rootFile()->setAtEventEntry(CLHEP::RandFlat::shootInt(engine, eventsRemainingInFile_) - 1);
261  }
262  rootFile()->nextEventEntry();
263 
264  bool found = rootFile()->readCurrentEvent(cache);
265  if (!found) {
266  rootFile()->setAtEventEntry(0);
267  found = rootFile()->readCurrentEvent(cache);
268  assert(found);
269  }
270  fileNameHash = lfnHash();
272  return true;
273  }
274 
276  size_t& fileNameHash,
277  CLHEP::HepRandomEngine* engine,
278  EventID const* idp,
279  bool recycleFiles) {
280  assert(engine);
281  assert(idp);
282  EventID const& id = *idp;
283  if (noMoreFiles() || !rootFile() || rootFile()->indexIntoFileIter().run() != id.run() ||
284  rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) {
285  bool found = skipToItem(id.run(), id.luminosityBlock(), 0);
286  if (!found) {
287  return false;
288  }
289  int eventsInLumi = 0;
290  assert(rootFile());
291  while (rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock()))
292  ++eventsInLumi;
293  found = skipToItem(id.run(), id.luminosityBlock(), 0);
294  assert(found);
295  int eventInLumi = CLHEP::RandFlat::shootInt(engine, eventsInLumi);
296  for (int i = 0; i < eventInLumi; ++i) {
297  bool foundEventInLumi = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
298  assert(foundEventInLumi);
299  }
300  }
301  assert(rootFile());
302  bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
303  if (found) {
304  found = rootFile()->readCurrentEvent(cache);
305  }
306  if (!found) {
307  found = rootFile()->setEntryAtItem(id.run(), id.luminosityBlock(), 0);
308  if (!found) {
309  return false;
310  }
311  return readOneRandomWithID(cache, fileNameHash, engine, idp, recycleFiles);
312  }
313  fileNameHash = lfnHash();
314  return true;
315  }
316 
318  size_t& fileNameHash,
319  CLHEP::HepRandomEngine* engine,
320  EventID const* id,
321  bool recycleFiles) {
322  assert(!sameLumiBlock_ || id != nullptr);
323  assert(sequential_ || engine != nullptr);
324  return (this->*fptr_)(cache, fileNameHash, engine, id, recycleFiles);
325  }
326 
328  desc.addUntracked<bool>("sequential", false)
329  ->setComment(
330  "True: loopEvents() reads events sequentially from beginning of first file.\n"
331  "False: loopEvents() first reads events beginning at random event. New files also chosen randomly");
332  desc.addUntracked<bool>("sameLumiBlock", false)
333  ->setComment(
334  "True: loopEvents() reads events only in same lumi as the specified event.\n"
335  "False: loopEvents() reads events regardless of lumi.");
336  desc.addUntracked<unsigned int>("skipEvents", 0U)
337  ->setComment(
338  "Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False");
339  desc.addUntracked<unsigned int>("cacheSize", roottree::defaultCacheSize)
340  ->setComment("Size of ROOT TTree prefetch cache. Affects performance.");
341  desc.addUntracked<bool>("enforceGUIDInFileName", false)
342  ->setComment(
343  "True: file name part is required to be equal to the GUID of the file\n"
344  "False: file name can be anything");
345  }
346 } // namespace edm
edm::RootInputFileSequence::noMoreFiles
bool noMoreFiles() const
Definition: RootInputFileSequence.h:65
edm::RootEmbeddedFileSequence::initFile_
void initFile_(bool skipBadFiles) override
Definition: RootEmbeddedFileSequence.cc:120
ThinnedAssociationsHelper.h
electrons_cff.bool
bool
Definition: electrons_cff.py:393
mps_fire.i
i
Definition: mps_fire.py:428
input
static const std::string input
Definition: EdmProvDump.cc:48
edm::RootInputFileSequence::setAtFirstFile
void setAtFirstFile()
Definition: RootInputFileSequence.h:70
funct::false
false
Definition: Factorize.h:29
edm::EmbeddedRootSource::productSelectorRules
ProductSelectorRules const & productSelectorRules() const
Definition: EmbeddedRootSource.h:46
edm::RootInputFileSequence::setAtFileSequenceNumber
void setAtFileSequenceNumber(size_t offset)
Definition: RootInputFileSequence.h:71
getEcalConditions_orcoffint2r_cff.catalog
catalog
Definition: getEcalConditions_orcoffint2r_cff.py:40
edm::RootEmbeddedFileSequence::skipEntries
void skipEntries(unsigned int offset)
Definition: RootEmbeddedFileSequence.cc:146
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
EmbeddedRootSource.h
BranchIDListHelper.h
edm::RootEmbeddedFileSequence::eventsRemainingInFile_
int eventsRemainingInFile_
Definition: RootEmbeddedFileSequence.h:69
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::RootInputFileSequence::atFirstFile
bool atFirstFile() const
Definition: RootInputFileSequence.h:63
edm::RootEmbeddedFileSequence::~RootEmbeddedFileSequence
~RootEmbeddedFileSequence() override
Definition: RootEmbeddedFileSequence.cc:109
edm::EmbeddedRootSource::processHistoryRegistryForUpdate
ProcessHistoryRegistry & processHistoryRegistryForUpdate()
Definition: VectorInputSource.h:57
BranchID.h
edm::RootEmbeddedFileSequence::sameLumiBlock_
bool sameLumiBlock_
Definition: RootEmbeddedFileSequence.h:67
edm::IndexIntoFile::invalidEntry
static constexpr EntryNumber_t invalidEntry
Definition: IndexIntoFile.h:236
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
edm::RootEmbeddedFileSequence::readOneEvent
bool readOneEvent(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool recycleFiles)
Definition: RootEmbeddedFileSequence.cc:317
edm::RootInputFileSequence::lfnHash
size_t lfnHash() const
Definition: RootInputFileSequence.h:84
edm::ProductRegistry::updateFromInput
void updateFromInput(ProductList const &other)
Definition: ProductRegistry.cc:209
RootTree.h
cms::cuda::assert
assert(be >=bs)
edm::InputFileCatalog
Definition: InputFileCatalog.h:32
edm::errors::NoSecondaryFiles
Definition: EDMException.h:60
InputFileCatalog.h
remoteMonitoring_LED_IterMethod_cfg.skipBadFiles
skipBadFiles
Definition: remoteMonitoring_LED_IterMethod_cfg.py:24
edm::RootInputFileSequence::logicalFileName
std::string const & logicalFileName() const
Definition: RootInputFileSequence.h:78
edm::errors::NotFound
Definition: EDMException.h:57
edm::SecondaryEventIDAndFileInfo
Definition: SecondaryEventIDAndFileInfo.h:8
newFWLiteAna.found
found
Definition: newFWLiteAna.py:118
edm::Service::isAvailable
bool isAvailable() const
Definition: Service.h:40
heavyIonCSV_trainingSettings.idx
idx
Definition: heavyIonCSV_trainingSettings.py:5
edm::EmbeddedRootSource::skipBadFiles
bool skipBadFiles() const
Definition: EmbeddedRootSource.h:42
edm::RootEmbeddedFileSequence::readOneSpecified
void readOneSpecified(EventPrincipal &cache, size_t &fileNameHash, SecondaryEventIDAndFileInfo const &id)
Definition: RootEmbeddedFileSequence.cc:224
fileCollector.seed
seed
Definition: fileCollector.py:127
edm::RootInputFileSequence::initFile
void initFile(bool skipBadFiles)
Definition: RootInputFileSequence.h:53
edm::RootEmbeddedFileSequence::enablePrefetching_
bool enablePrefetching_
Definition: RootEmbeddedFileSequence.h:72
edm::RootEmbeddedFileSequence::initialNumberOfEventsToSkip_
int initialNumberOfEventsToSkip_
Definition: RootEmbeddedFileSequence.h:70
BXlumiParameters_cfi.lumi
lumi
Definition: BXlumiParameters_cfi.py:6
edm::RootInputFileSequence::RootFileSharedPtr
std::shared_ptr< RootFile > RootFileSharedPtr
Definition: RootInputFileSequence.h:52
edm::EmbeddedRootSource::runHelper
RunHelperBase * runHelper()
Definition: EmbeddedRootSource.h:47
edm::EventPrincipal
Definition: EventPrincipal.h:46
edm::RootEmbeddedFileSequence::readOneSequentialWithID
bool readOneSequentialWithID(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool)
Definition: RootEmbeddedFileSequence.cc:184
edm::RootEmbeddedFileSequence::closeFile_
void closeFile_() override
Definition: RootEmbeddedFileSequence.cc:113
Service.h
edm::RootInputFileSequence::setAtNextFile
void setAtNextFile()
Definition: RootInputFileSequence.h:73
submitPVResolutionJobs.count
count
Definition: submitPVResolutionJobs.py:352
edm::roottree::defaultCacheSize
unsigned const int defaultCacheSize
Definition: RootTree.h:47
ParameterSetDescription.h
utilities.cache
def cache(function)
Definition: utilities.py:3
mitigatedMETSequence_cff.U
U
Definition: mitigatedMETSequence_cff.py:36
edm::RootInputFileSequence
Definition: RootInputFileSequence.h:29
edm::ParameterSet
Definition: ParameterSet.h:47
edm::EmbeddedRootSource::bypassVersionCheck
bool bypassVersionCheck() const
Definition: EmbeddedRootSource.h:43
edmPickEvents.event
event
Definition: edmPickEvents.py:273
edm::RootInputFileSequence::skipToItemInNewFile
bool skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event)
Definition: RootInputFileSequence.cc:132
edm::RootInputFileSequence::noFiles
bool noFiles() const
Definition: RootInputFileSequence.h:66
edm::EmbeddedRootSource
Definition: EmbeddedRootSource.h:34
edm::RootInputFileSequence::rootFile
std::shared_ptr< RootFile const > rootFile() const
Definition: RootInputFileSequence.h:87
edm::Service
Definition: Service.h:30
createfilelist.int
int
Definition: createfilelist.py:10
edm::RootEmbeddedFileSequence::orderedProcessHistoryIDs_
std::vector< ProcessHistoryID > orderedProcessHistoryIDs_
Definition: RootEmbeddedFileSequence.h:64
edm::EmbeddedRootSource::treeMaxVirtualSize
int treeMaxVirtualSize() const
Definition: EmbeddedRootSource.h:45
edm::RootEmbeddedFileSequence::readOneRandom
bool readOneRandom(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *, bool)
Definition: RootEmbeddedFileSequence.cc:243
edm::RootEmbeddedFileSequence::makeRootFile
RootFileSharedPtr makeRootFile(std::shared_ptr< InputFile > filePtr) override
Definition: RootEmbeddedFileSequence.cc:124
RootFile.h
InputFile.h
edm::RootEmbeddedFileSequence::readOneSequential
bool readOneSequential(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *, bool recycleFiles)
Definition: RootEmbeddedFileSequence.cc:161
edm::RootEmbeddedFileSequence::input_
EmbeddedRootSource & input_
Definition: RootEmbeddedFileSequence.h:62
edm::RootEmbeddedFileSequence::fptr_
bool(RootEmbeddedFileSequence::* fptr_)(EventPrincipal &, size_t &, CLHEP::HepRandomEngine *, EventID const *, bool)
Definition: RootEmbeddedFileSequence.h:68
edm::RootEmbeddedFileSequence::endJob
void endJob()
Definition: RootEmbeddedFileSequence.cc:111
edm::RootEmbeddedFileSequence::treeCacheSize_
unsigned int treeCacheSize_
Definition: RootEmbeddedFileSequence.h:71
submitPVResolutionJobs.desc
string desc
Definition: submitPVResolutionJobs.py:251
writedatasetfile.run
run
Definition: writedatasetfile.py:27
edm::RootEmbeddedFileSequence::readOneRandomWithID
bool readOneRandomWithID(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool)
Definition: RootEmbeddedFileSequence.cc:275
InputSource.h
triggerObjects_cff.id
id
Definition: triggerObjects_cff.py:31
Exception
Definition: hltDiff.cc:246
edm::RootEmbeddedFileSequence::sequential_
bool sequential_
Definition: RootEmbeddedFileSequence.h:66
edm::InputType::SecondarySource
edm::RootEmbeddedFileSequence::RootEmbeddedFileSequence
RootEmbeddedFileSequence(ParameterSet const &pset, EmbeddedRootSource &input, InputFileCatalog const &catalog)
Definition: RootEmbeddedFileSequence.cc:26
edm::RootInputFileSequence::initTheFile
void initTheFile(bool skipBadFiles, bool deleteIndexIntoFile, InputSource *input, char const *inputTypeName, InputType inputType)
Definition: RootInputFileSequence.cc:182
edm::EmbeddedRootSource::productRegistryUpdate
ProductRegistry & productRegistryUpdate()
Definition: VectorInputSource.h:55
edm::RootInputFileSequence::indexesIntoFiles
std::vector< std::shared_ptr< IndexIntoFile > > const & indexesIntoFiles() const
Definition: RootInputFileSequence.h:82
edm::EmbeddedRootSource::nStreams
unsigned int nStreams() const
Definition: EmbeddedRootSource.h:44
edm::RootEmbeddedFileSequence::fillDescription
static void fillDescription(ParameterSetDescription &desc)
Definition: RootEmbeddedFileSequence.cc:327
edm::RootInputFileSequence::numberOfFiles
size_t numberOfFiles() const
Definition: RootInputFileSequence.h:68
ParameterSet.h
edm::EventID
Definition: EventID.h:31
hltrates_dqm_sourceclient-live_cfg.offset
offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:82
RootEmbeddedFileSequence.h
edm::RootInputFileSequence::sequenceNumberOfFile
size_t sequenceNumberOfFile() const
Definition: RootInputFileSequence.h:67
edm::RootInputFileSequence::fileNames
std::vector< std::string > const & fileNames() const
Definition: RootInputFileSequence.h:76
edm::ProcessConfiguration
Definition: ProcessConfiguration.h:14
edm::RootEmbeddedFileSequence::enforceGUIDInFileName_
bool enforceGUIDInFileName_
Definition: RootEmbeddedFileSequence.h:73
muonDTDigis_cfi.pset
pset
Definition: muonDTDigis_cfi.py:27
edm::RootInputFileSequence::skipToItem
bool skipToItem(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash=0U, bool currentFileFirst=true)
Definition: RootInputFileSequence.cc:150
SiteLocalConfig.h
findQualityFiles.size
size
Write out results.
Definition: findQualityFiles.py:443
edm::RootInputFileSequence::fileCatalogItems
std::vector< FileCatalogItem > const & fileCatalogItems() const
Definition: RootInputFileSequence.cc:35