CMS 3D CMS Logo

RootEmbeddedFileSequence.cc
Go to the documentation of this file.
1 /*----------------------------------------------------------------------
2 ----------------------------------------------------------------------*/
3 #include "EmbeddedRootSource.h"
4 #include "InputFile.h"
5 #include "RootFile.h"
7 #include "RootTree.h"
8 
18 
19 #include "CLHEP/Random/RandFlat.h"
20 
21 #include <random>
22 
23 namespace edm {
24  class EventPrincipal;
25 
30  input_(input),
31  orderedProcessHistoryIDs_(),
32  sequential_(pset.getUntrackedParameter<bool>("sequential", false)),
33  sameLumiBlock_(pset.getUntrackedParameter<bool>("sameLumiBlock", false)),
34  fptr_(nullptr),
35  eventsRemainingInFile_(0),
36  // The default value provided as the second argument to the getUntrackedParameter function call
37  // is not used when the ParameterSet has been validated and the parameters are not optional
38  // in the description. This is currently true when PoolSource is the primary input source.
39  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
40  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
41  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
42  // and should be deleted from the code.
43  initialNumberOfEventsToSkip_(pset.getUntrackedParameter<unsigned int>("skipEvents", 0U)),
44  treeCacheSize_(pset.getUntrackedParameter<unsigned int>("cacheSize", roottree::defaultCacheSize)),
45  enablePrefetching_(false),
46  enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName", false)) {
47  if (noFiles()) {
49  << "RootEmbeddedFileSequence no input files specified for secondary input source.\n";
50  }
51  //
52  // The SiteLocalConfig controls the TTreeCache size and the prefetching settings.
54  if (pSLC.isAvailable()) {
55  if (treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) {
56  treeCacheSize_ = *(pSLC->sourceTTreeCacheSize());
57  }
58  enablePrefetching_ = pSLC->enablePrefetching();
59  }
60 
61  // Set the pointer to the function that reads an event.
62  if (sameLumiBlock_) {
63  if (sequential_) {
65  } else {
67  }
68  } else {
69  if (sequential_) {
71  } else {
73  }
74  }
75 
76  // For the secondary input source we do not stage in.
77  if (sequential_) {
78  // We open the first file
79  if (!atFirstFile()) {
81  initFile(false);
82  }
83  assert(rootFile());
84  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
85  if (!sameLumiBlock_) {
87  }
88  } else {
89  // We randomly choose the first file to open.
90  // We cannot use the random number service yet.
91  std::ifstream f("/dev/urandom");
92  unsigned int seed;
93  f.read(reinterpret_cast<char*>(&seed), sizeof(seed));
94  std::default_random_engine dre(seed);
95  size_t count = numberOfFiles();
96  std::uniform_int_distribution<int> distribution(0, count - 1);
97  while (!rootFile() && count != 0) {
98  --count;
99  int offset = distribution(dre);
102  }
103  }
104  if (rootFile()) {
105  input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList());
106  }
107  }
108 
110 
112 
114  // delete the RootFile object.
115  if (rootFile()) {
116  rootFile().reset();
117  }
118  }
119 
121  initTheFile(skipBadFiles, false, nullptr, "mixingFiles", InputType::SecondarySource);
122  }
123 
125  std::shared_ptr<InputFile> filePtr) {
126  size_t currentIndexIntoFile = sequenceNumberOfFile();
127  return std::make_shared<RootFile>(fileNames()[0],
129  logicalFileName(),
130  filePtr,
131  input_.nStreams(),
134  input_.runHelper(),
139  currentIndexIntoFile,
144  }
145 
147  // offset is decremented by the number of events actually skipped.
148  bool completed = rootFile()->skipEntries(offset);
149  while (!completed) {
150  setAtNextFile();
151  if (noMoreFiles()) {
152  setAtFirstFile();
153  }
154  initFile(false);
155  assert(rootFile());
156  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
157  completed = rootFile()->skipEntries(offset);
158  }
159  }
160 
162  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const*, bool recycleFiles) {
163  assert(rootFile());
164  rootFile()->nextEventEntry();
165  bool found = rootFile()->readCurrentEvent(cache);
166  if (!found) {
167  setAtNextFile();
168  if (noMoreFiles()) {
169  if (recycleFiles) {
170  setAtFirstFile();
171  } else {
172  return false;
173  }
174  }
175  initFile(false);
176  assert(rootFile());
177  rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry);
178  return readOneSequential(cache, fileNameHash, nullptr, nullptr, recycleFiles);
179  }
180  fileNameHash = lfnHash();
181  return true;
182  }
183 
185  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const* idp, bool recycleFiles) {
186  assert(idp);
187  EventID const& id = *idp;
190  if (offset > 0) {
191  assert(rootFile());
192  while (offset > 0) {
193  bool found = readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles);
194  if (!found) {
195  return false;
196  }
197  --offset;
198  }
199  }
200  assert(rootFile());
201  if (noMoreFiles() || rootFile()->indexIntoFileIter().run() != id.run() ||
202  rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) {
203  bool found = skipToItem(id.run(), id.luminosityBlock(), 0, 0, false);
204  if (!found) {
205  return false;
206  }
207  }
208  assert(rootFile());
209  bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
210  if (found) {
211  found = rootFile()->readCurrentEvent(cache);
212  }
213  if (!found) {
214  found = skipToItemInNewFile(id.run(), id.luminosityBlock(), 0);
215  if (!found) {
216  return false;
217  }
218  return readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles);
219  }
220  fileNameHash = lfnHash();
221  return true;
222  }
223 
225  size_t& fileNameHash,
227  EventID const& id = idx.eventID();
228  bool found = skipToItem(id.run(), id.luminosityBlock(), id.event(), idx.fileNameHash());
229  if (!found) {
230  throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneSpecified(): Secondary Input files"
231  << " do not contain specified event:\n"
232  << id << "\n";
233  }
234  assert(rootFile());
235  found = rootFile()->readCurrentEvent(cache);
236  assert(found);
237  fileNameHash = idx.fileNameHash();
238  if (fileNameHash == 0U) {
239  fileNameHash = lfnHash();
240  }
241  }
242 
244  EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const*, bool) {
245  assert(rootFile());
246  assert(engine);
247  unsigned int currentSeqNumber = sequenceNumberOfFile();
248  while (eventsRemainingInFile_ == 0) {
249  unsigned int newSeqNumber = CLHEP::RandFlat::shootInt(engine, fileCatalogItems().size());
250  setAtFileSequenceNumber(newSeqNumber);
251  if (newSeqNumber != currentSeqNumber) {
252  initFile(false);
253  currentSeqNumber = newSeqNumber;
254  }
255  eventsRemainingInFile_ = rootFile()->eventTree().entries();
256  if (eventsRemainingInFile_ == 0) {
257  throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file "
258  << fileNames()[0] << " contains no events.\n";
259  }
260  rootFile()->setAtEventEntry(CLHEP::RandFlat::shootInt(engine, eventsRemainingInFile_) - 1);
261  }
262  rootFile()->nextEventEntry();
263 
264  bool found = rootFile()->readCurrentEvent(cache);
265  if (!found) {
266  rootFile()->setAtEventEntry(0);
267  found = rootFile()->readCurrentEvent(cache);
268  assert(found);
269  }
270  fileNameHash = lfnHash();
272  return true;
273  }
274 
276  size_t& fileNameHash,
277  CLHEP::HepRandomEngine* engine,
278  EventID const* idp,
279  bool recycleFiles) {
280  assert(engine);
281  assert(idp);
282  EventID const& id = *idp;
283  if (noMoreFiles() || !rootFile() || rootFile()->indexIntoFileIter().run() != id.run() ||
284  rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) {
285  bool found = skipToItem(id.run(), id.luminosityBlock(), 0);
286  if (!found) {
287  return false;
288  }
289  int eventsInLumi = 0;
290  assert(rootFile());
291  while (rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock()))
292  ++eventsInLumi;
293  found = skipToItem(id.run(), id.luminosityBlock(), 0);
294  assert(found);
295  int eventInLumi = CLHEP::RandFlat::shootInt(engine, eventsInLumi);
296  for (int i = 0; i < eventInLumi; ++i) {
297  bool foundEventInLumi = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
298  assert(foundEventInLumi);
299  }
300  }
301  assert(rootFile());
302  bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
303  if (found) {
304  found = rootFile()->readCurrentEvent(cache);
305  }
306  if (!found) {
307  found = rootFile()->setEntryAtItem(id.run(), id.luminosityBlock(), 0);
308  if (!found) {
309  return false;
310  }
311  return readOneRandomWithID(cache, fileNameHash, engine, idp, recycleFiles);
312  }
313  fileNameHash = lfnHash();
314  return true;
315  }
316 
318  size_t& fileNameHash,
319  CLHEP::HepRandomEngine* engine,
320  EventID const* id,
321  bool recycleFiles) {
322  assert(!sameLumiBlock_ || id != nullptr);
323  assert(sequential_ || engine != nullptr);
324  return (this->*fptr_)(cache, fileNameHash, engine, id, recycleFiles);
325  }
326 
328  desc.addUntracked<bool>("sequential", false)
329  ->setComment(
330  "True: loopEvents() reads events sequentially from beginning of first file.\n"
331  "False: loopEvents() first reads events beginning at random event. New files also chosen randomly");
332  desc.addUntracked<bool>("sameLumiBlock", false)
333  ->setComment(
334  "True: loopEvents() reads events only in same lumi as the specified event.\n"
335  "False: loopEvents() reads events regardless of lumi.");
336  desc.addUntracked<unsigned int>("skipEvents", 0U)
337  ->setComment(
338  "Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False");
339  desc.addUntracked<unsigned int>("cacheSize", roottree::defaultCacheSize)
340  ->setComment("Size of ROOT TTree prefetch cache. Affects performance.");
341  desc.addUntracked<bool>("enforceGUIDInFileName", false)
342  ->setComment(
343  "True: file name part is required to be equal to the GUID of the file\n"
344  "False: file name can be anything");
345  }
346 } // namespace edm
edm::RootInputFileSequence::noMoreFiles
bool noMoreFiles() const
Definition: RootInputFileSequence.h:65
edm::RootEmbeddedFileSequence::initFile_
void initFile_(bool skipBadFiles) override
Definition: RootEmbeddedFileSequence.cc:120
ThinnedAssociationsHelper.h
electrons_cff.bool
bool
Definition: electrons_cff.py:372
mps_fire.i
i
Definition: mps_fire.py:355
input
static const std::string input
Definition: EdmProvDump.cc:48
edm::RootInputFileSequence::setAtFirstFile
void setAtFirstFile()
Definition: RootInputFileSequence.h:70
funct::false
false
Definition: Factorize.h:34
edm::EmbeddedRootSource::productSelectorRules
ProductSelectorRules const & productSelectorRules() const
Definition: EmbeddedRootSource.h:46
edm::RootInputFileSequence::setAtFileSequenceNumber
void setAtFileSequenceNumber(size_t offset)
Definition: RootInputFileSequence.h:71
getEcalConditions_orcoffint2r_cff.catalog
catalog
Definition: getEcalConditions_orcoffint2r_cff.py:40
edm::RootEmbeddedFileSequence::skipEntries
void skipEntries(unsigned int offset)
Definition: RootEmbeddedFileSequence.cc:146
f
double f[11][100]
Definition: MuScleFitUtils.cc:78
EmbeddedRootSource.h
BranchIDListHelper.h
edm::RootEmbeddedFileSequence::eventsRemainingInFile_
int eventsRemainingInFile_
Definition: RootEmbeddedFileSequence.h:69
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::RootInputFileSequence::atFirstFile
bool atFirstFile() const
Definition: RootInputFileSequence.h:63
edm::RootEmbeddedFileSequence::~RootEmbeddedFileSequence
~RootEmbeddedFileSequence() override
Definition: RootEmbeddedFileSequence.cc:109
edm::EmbeddedRootSource::processHistoryRegistryForUpdate
ProcessHistoryRegistry & processHistoryRegistryForUpdate()
Definition: VectorInputSource.h:57
BranchID.h
edm::RootEmbeddedFileSequence::sameLumiBlock_
bool sameLumiBlock_
Definition: RootEmbeddedFileSequence.h:67
edm::IndexIntoFile::invalidEntry
static constexpr EntryNumber_t invalidEntry
Definition: IndexIntoFile.h:236
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
edm::RootEmbeddedFileSequence::readOneEvent
bool readOneEvent(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool recycleFiles)
Definition: RootEmbeddedFileSequence.cc:317
SurveyInfoScenario_cff.distribution
distribution
Definition: SurveyInfoScenario_cff.py:346
edm::RootInputFileSequence::lfnHash
size_t lfnHash() const
Definition: RootInputFileSequence.h:84
edm::ProductRegistry::updateFromInput
void updateFromInput(ProductList const &other)
Definition: ProductRegistry.cc:221
RootTree.h
cms::cuda::assert
assert(be >=bs)
edm::InputFileCatalog
Definition: InputFileCatalog.h:32
edm::errors::NoSecondaryFiles
Definition: EDMException.h:60
InputFileCatalog.h
remoteMonitoring_LED_IterMethod_cfg.skipBadFiles
skipBadFiles
Definition: remoteMonitoring_LED_IterMethod_cfg.py:24
edm::RootInputFileSequence::logicalFileName
std::string const & logicalFileName() const
Definition: RootInputFileSequence.h:78
edm::errors::NotFound
Definition: EDMException.h:57
edm::SecondaryEventIDAndFileInfo
Definition: SecondaryEventIDAndFileInfo.h:8
newFWLiteAna.found
found
Definition: newFWLiteAna.py:118
training_settings.idx
idx
Definition: training_settings.py:16
edm::Service::isAvailable
bool isAvailable() const
Definition: Service.h:40
edm::EmbeddedRootSource::skipBadFiles
bool skipBadFiles() const
Definition: EmbeddedRootSource.h:42
edm::RootEmbeddedFileSequence::readOneSpecified
void readOneSpecified(EventPrincipal &cache, size_t &fileNameHash, SecondaryEventIDAndFileInfo const &id)
Definition: RootEmbeddedFileSequence.cc:224
edm::RootInputFileSequence::initFile
void initFile(bool skipBadFiles)
Definition: RootInputFileSequence.h:53
edm::RootEmbeddedFileSequence::enablePrefetching_
bool enablePrefetching_
Definition: RootEmbeddedFileSequence.h:72
edm::RootEmbeddedFileSequence::initialNumberOfEventsToSkip_
int initialNumberOfEventsToSkip_
Definition: RootEmbeddedFileSequence.h:70
BXlumiParameters_cfi.lumi
lumi
Definition: BXlumiParameters_cfi.py:6
edm::RootInputFileSequence::RootFileSharedPtr
std::shared_ptr< RootFile > RootFileSharedPtr
Definition: RootInputFileSequence.h:52
edm::EmbeddedRootSource::runHelper
RunHelperBase * runHelper()
Definition: EmbeddedRootSource.h:47
edm::EventPrincipal
Definition: EventPrincipal.h:46
edm::RootEmbeddedFileSequence::readOneSequentialWithID
bool readOneSequentialWithID(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool)
Definition: RootEmbeddedFileSequence.cc:184
edm::RootEmbeddedFileSequence::closeFile_
void closeFile_() override
Definition: RootEmbeddedFileSequence.cc:113
Service.h
edm::RootInputFileSequence::setAtNextFile
void setAtNextFile()
Definition: RootInputFileSequence.h:73
edm::roottree::defaultCacheSize
unsigned const int defaultCacheSize
Definition: RootTree.h:47
ParameterSetDescription.h
utilities.cache
def cache(function)
Definition: utilities.py:3
mitigatedMETSequence_cff.U
U
Definition: mitigatedMETSequence_cff.py:36
edm::RootInputFileSequence
Definition: RootInputFileSequence.h:29
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:100
edm::ParameterSet
Definition: ParameterSet.h:36
edm::EmbeddedRootSource::bypassVersionCheck
bool bypassVersionCheck() const
Definition: EmbeddedRootSource.h:43
KineDebug3::count
void count()
Definition: KinematicConstrainedVertexUpdatorT.h:21
edm::RootInputFileSequence::skipToItemInNewFile
bool skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event)
Definition: RootInputFileSequence.cc:132
edm::RootInputFileSequence::noFiles
bool noFiles() const
Definition: RootInputFileSequence.h:66
edm::EmbeddedRootSource
Definition: EmbeddedRootSource.h:34
edm::RootInputFileSequence::rootFile
std::shared_ptr< RootFile const > rootFile() const
Definition: RootInputFileSequence.h:87
edm::Service
Definition: Service.h:30
createfilelist.int
int
Definition: createfilelist.py:10
edm::RootEmbeddedFileSequence::orderedProcessHistoryIDs_
std::vector< ProcessHistoryID > orderedProcessHistoryIDs_
Definition: RootEmbeddedFileSequence.h:64
edm::EmbeddedRootSource::treeMaxVirtualSize
int treeMaxVirtualSize() const
Definition: EmbeddedRootSource.h:45
edm::RootEmbeddedFileSequence::readOneRandom
bool readOneRandom(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *, bool)
Definition: RootEmbeddedFileSequence.cc:243
edm::RootEmbeddedFileSequence::makeRootFile
RootFileSharedPtr makeRootFile(std::shared_ptr< InputFile > filePtr) override
Definition: RootEmbeddedFileSequence.cc:124
RootFile.h
InputFile.h
edm::RootEmbeddedFileSequence::readOneSequential
bool readOneSequential(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *, bool recycleFiles)
Definition: RootEmbeddedFileSequence.cc:161
edm::RootEmbeddedFileSequence::input_
EmbeddedRootSource & input_
Definition: RootEmbeddedFileSequence.h:62
edm::RootEmbeddedFileSequence::fptr_
bool(RootEmbeddedFileSequence::* fptr_)(EventPrincipal &, size_t &, CLHEP::HepRandomEngine *, EventID const *, bool)
Definition: RootEmbeddedFileSequence.h:68
edm::RootEmbeddedFileSequence::endJob
void endJob()
Definition: RootEmbeddedFileSequence.cc:111
edm::RootEmbeddedFileSequence::treeCacheSize_
unsigned int treeCacheSize_
Definition: RootEmbeddedFileSequence.h:71
writedatasetfile.run
run
Definition: writedatasetfile.py:27
edm::RootEmbeddedFileSequence::readOneRandomWithID
bool readOneRandomWithID(EventPrincipal &cache, size_t &fileNameHash, CLHEP::HepRandomEngine *, EventID const *id, bool)
Definition: RootEmbeddedFileSequence.cc:275
InputSource.h
triggerObjects_cff.id
id
Definition: triggerObjects_cff.py:31
Exception
Definition: hltDiff.cc:246
edm::RootEmbeddedFileSequence::sequential_
bool sequential_
Definition: RootEmbeddedFileSequence.h:66
edm::InputType::SecondarySource
edm::RootEmbeddedFileSequence::RootEmbeddedFileSequence
RootEmbeddedFileSequence(ParameterSet const &pset, EmbeddedRootSource &input, InputFileCatalog const &catalog)
Definition: RootEmbeddedFileSequence.cc:26
edm::RootInputFileSequence::initTheFile
void initTheFile(bool skipBadFiles, bool deleteIndexIntoFile, InputSource *input, char const *inputTypeName, InputType inputType)
Definition: RootInputFileSequence.cc:182
edm::EmbeddedRootSource::productRegistryUpdate
ProductRegistry & productRegistryUpdate()
Definition: VectorInputSource.h:55
edm::RootInputFileSequence::indexesIntoFiles
std::vector< std::shared_ptr< IndexIntoFile > > const & indexesIntoFiles() const
Definition: RootInputFileSequence.h:82
edm::EmbeddedRootSource::nStreams
unsigned int nStreams() const
Definition: EmbeddedRootSource.h:44
edm::RootEmbeddedFileSequence::fillDescription
static void fillDescription(ParameterSetDescription &desc)
Definition: RootEmbeddedFileSequence.cc:327
edm::RootInputFileSequence::numberOfFiles
size_t numberOfFiles() const
Definition: RootInputFileSequence.h:68
ParameterSet.h
edm::EventID
Definition: EventID.h:31
hltrates_dqm_sourceclient-live_cfg.offset
offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:82
RootEmbeddedFileSequence.h
event
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger but the state exists so we define the behavior If all triggers are the negative crieriion will lead to accepting the event(this again matches the behavior of "!*" before the partial wildcard feature was incorporated). The per-event "cost" of each negative criterion with multiple relevant triggers is about the same as ! *was in the past
edm::RootInputFileSequence::sequenceNumberOfFile
size_t sequenceNumberOfFile() const
Definition: RootInputFileSequence.h:67
edm::RootInputFileSequence::fileNames
std::vector< std::string > const & fileNames() const
Definition: RootInputFileSequence.h:76
edm::ProcessConfiguration
Definition: ProcessConfiguration.h:14
edm::RootEmbeddedFileSequence::enforceGUIDInFileName_
bool enforceGUIDInFileName_
Definition: RootEmbeddedFileSequence.h:73
SurveyInfoScenario_cff.seed
seed
Definition: SurveyInfoScenario_cff.py:295
muonDTDigis_cfi.pset
pset
Definition: muonDTDigis_cfi.py:27
edm::RootInputFileSequence::skipToItem
bool skipToItem(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash=0U, bool currentFileFirst=true)
Definition: RootInputFileSequence.cc:150
SiteLocalConfig.h
findQualityFiles.size
size
Write out results.
Definition: findQualityFiles.py:443
edm::RootInputFileSequence::fileCatalogItems
std::vector< FileCatalogItem > const & fileCatalogItems() const
Definition: RootInputFileSequence.cc:35