CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_13_patch3/src/IOPool/Input/src/RootInputFileSequence.cc

Go to the documentation of this file.
00001 /*----------------------------------------------------------------------
00002 ----------------------------------------------------------------------*/
00003 #include "DuplicateChecker.h"
00004 #include "PoolSource.h"
00005 #include "RootFile.h"
00006 #include "RootInputFileSequence.h"
00007 #include "RootTree.h"
00008 
00009 #include "DataFormats/Provenance/interface/ProductRegistry.h"
00010 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
00011 #include "FWCore/Framework/interface/EventPrincipal.h"
00012 #include "FWCore/Framework/interface/FileBlock.h"
00013 #include "FWCore/Framework/src/PrincipalCache.h"
00014 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00015 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00016 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
00017 #include "FWCore/ServiceRegistry/interface/Service.h"
00018 #include "FWCore/Utilities/interface/RandomNumberGenerator.h"
00019 #include "Utilities/StorageFactory/interface/StorageFactory.h"
00020 
00021 #include "CLHEP/Random/RandFlat.h"
00022 #include "InputFile.h"
00023 #include "TSystem.h"
00024 
00025 namespace edm {
00026   RootInputFileSequence::RootInputFileSequence(
00027                 ParameterSet const& pset,
00028                 PoolSource const& input,
00029                 InputFileCatalog const& catalog,
00030                 PrincipalCache& cache,
00031                 InputType::InputType inputType) :
00032     input_(input),
00033     inputType_(inputType),
00034     catalog_(catalog),
00035     firstFile_(true),
00036     fileIterBegin_(fileCatalogItems().begin()),
00037     fileIterEnd_(fileCatalogItems().end()),
00038     fileIter_(fileIterEnd_),
00039     fileIterLastOpened_(fileIterEnd_),
00040     rootFile_(),
00041     parametersMustMatch_(BranchDescription::Permissive),
00042     branchesMustMatch_(BranchDescription::Permissive),
00043     flatDistribution_(),
00044     indexesIntoFiles_(fileCatalogItems().size()),
00045     orderedProcessHistoryIDs_(),
00046     eventSkipperByID_(inputType == InputType::Primary ? EventSkipperByID::create(pset).release() : 0),
00047     eventsRemainingInFile_(0),
00048     // The default value provided as the second argument to the getUntrackedParameter function call
00049     // is not used when the ParameterSet has been validated and the parameters are not optional
00050     // in the description.  This is currently true when PoolSource is the primary input source.
00051     // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
00052     // yet, so the ParameterSet does not get validated yet.  As soon as all the modules with a SecSource
00053     // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
00054     // and should be deleted from the code.
00055     numberOfEventsToSkip_(inputType == InputType::Primary ? pset.getUntrackedParameter<unsigned int>("skipEvents", 0U) : 0U),
00056     noEventSort_(inputType == InputType::Primary ? pset.getUntrackedParameter<bool>("noEventSort", true) : false),
00057     skipBadFiles_(pset.getUntrackedParameter<bool>("skipBadFiles", false)),
00058     treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter<unsigned int>("cacheSize", roottree::defaultCacheSize) : 0U),
00059     treeMaxVirtualSize_(pset.getUntrackedParameter<int>("treeMaxVirtualSize", -1)),
00060     setRun_(pset.getUntrackedParameter<unsigned int>("setRunNumber", 0U)),
00061     groupSelectorRules_(pset, "inputCommands", "InputSource"),
00062     duplicateChecker_(inputType == InputType::Primary ? new DuplicateChecker(pset) : 0),
00063     dropDescendants_(pset.getUntrackedParameter<bool>("dropDescendantsOfDroppedBranches", inputType != InputType::SecondarySource)),
00064     labelRawDataLikeMC_(pset.getUntrackedParameter<bool>("labelRawDataLikeMC", true)),
00065     usingGoToEvent_(false) {
00066 
00067     //we now allow the site local config to specify what the TTree cache size should be
00068     Service<SiteLocalConfig> pSLC;
00069     if(treeCacheSize_ != 0U && pSLC.isAvailable() && pSLC->sourceTTreeCacheSize()) {
00070       treeCacheSize_ = *(pSLC->sourceTTreeCacheSize());
00071     }
00072 
00073     if(inputType_ == InputType::Primary) {
00074       //NOTE: we do not want to stage in secondary files since we can be given a list of
00075       // thousands of files and prestaging all those files can cause a site to fail
00076       StorageFactory *factory = StorageFactory::get();
00077       for(fileIter_ = fileIterBegin_; fileIter_ != fileIterEnd_; ++fileIter_) {
00078         factory->activateTimeout(fileIter_->fileName());
00079         factory->stagein(fileIter_->fileName());
00080       }
00081     }
00082 
00083     std::string parametersMustMatch = pset.getUntrackedParameter<std::string>("parametersMustMatch", std::string("permissive"));
00084     if(parametersMustMatch == std::string("strict")) parametersMustMatch_ = BranchDescription::Strict;
00085 
00086     std::string branchesMustMatch = pset.getUntrackedParameter<std::string>("branchesMustMatch", std::string("permissive"));
00087     if(branchesMustMatch == std::string("strict")) branchesMustMatch_ = BranchDescription::Strict;
00088 
00089     if(inputType != InputType::SecondarySource) {
00090       for(fileIter_ = fileIterBegin_; fileIter_ != fileIterEnd_; ++fileIter_) {
00091         initFile(skipBadFiles_);
00092         if(rootFile_) break;
00093       }
00094       if(rootFile_) {
00095         productRegistryUpdate().updateFromInput(rootFile_->productRegistry()->productList());
00096         if(numberOfEventsToSkip_ != 0) {
00097           skipEvents(numberOfEventsToSkip_, cache);
00098         }
00099       }
00100     }
00101   }
00102 
00103   std::vector<FileCatalogItem> const&
00104   RootInputFileSequence::fileCatalogItems() const {
00105     return catalog_.fileCatalogItems();
00106   }
00107 
00108   void
00109   RootInputFileSequence::endJob() {
00110     closeFile_();
00111   }
00112 
00113   boost::shared_ptr<FileBlock>
00114   RootInputFileSequence::readFile_(PrincipalCache& cache) {
00115     if(firstFile_) {
00116       // The first input file has already been opened.
00117       firstFile_ = false;
00118       if(!rootFile_) {
00119         initFile(skipBadFiles_);
00120       }
00121     } else {
00122       if(!nextFile(cache)) {
00123         assert(0);
00124       }
00125     }
00126     if(!rootFile_) {
00127       return boost::shared_ptr<FileBlock>(new FileBlock);
00128     }
00129     return rootFile_->createFileBlock();
00130   }
00131 
00132   void RootInputFileSequence::closeFile_() {
00133     // close the currently open file, if any, and delete the RootFile object.
00134     if(rootFile_) {
00135       if(inputType_ != InputType::SecondarySource) {
00136         std::unique_ptr<InputSource::FileCloseSentry>
00137         sentry((inputType_ == InputType::Primary) ? new InputSource::FileCloseSentry(input_) : 0);
00138         rootFile_->close();
00139         if(duplicateChecker_) duplicateChecker_->inputFileClosed();
00140       }
00141       rootFile_.reset();
00142     }
00143   }
00144 
00145   void RootInputFileSequence::initFile(bool skipBadFiles) {
00146     // We are really going to close the open file.
00147 
00148     // If this is the primary sequence, we are not duplicate checking across files
00149     // and we are not using random access to find events, then we can delete the
00150     // IndexIntoFile for the file we are closing. If we can't delete all of it,
00151     // then we can delete the parts we do not need.
00152     if(fileIterLastOpened_ != fileIterEnd_) {
00153       size_t currentIndexIntoFile = fileIterLastOpened_ - fileIterBegin_;
00154       bool needIndexesForDuplicateChecker = duplicateChecker_ && duplicateChecker_->checkingAllFiles() && !duplicateChecker_->checkDisabled();
00155       bool deleteIndexIntoFile = inputType_ == InputType::Primary &&
00156                                  !needIndexesForDuplicateChecker &&
00157                                  !usingGoToEvent_;
00158       if(deleteIndexIntoFile) {
00159         indexesIntoFiles_[currentIndexIntoFile].reset();
00160       } else {
00161         if(indexesIntoFiles_[currentIndexIntoFile]) indexesIntoFiles_[currentIndexIntoFile]->inputFileClosed();
00162       }
00163       fileIterLastOpened_ = fileIterEnd_;
00164     }
00165     closeFile_();
00166 
00167     if(fileIter_ == fileIterEnd_) {
00168       // No files specified
00169       return;
00170     }
00171 
00172     // Check if the logical file name was found.
00173     if(fileIter_->fileName().empty()) {
00174       // LFN not found in catalog.
00175       InputFile::reportSkippedFile(fileIter_->fileName(), fileIter_->logicalFileName());
00176       if(!skipBadFiles) {
00177         throw cms::Exception("LogicalFileNameNotFound", "RootInputFileSequence::initFile()\n")
00178           << "Logical file name '" << fileIter_->logicalFileName() << "' was not found in the file catalog.\n"
00179           << "If you wanted a local file, you forgot the 'file:' prefix\n"
00180           << "before the file name in your configuration file.\n";
00181       }
00182       LogWarning("") << "Input logical file: " << fileIter_->logicalFileName() << " was not found in the catalog, and will be skipped.\n";
00183       return;
00184     }
00185 
00186     // Determine whether we have a fallback URL specified; if so, prepare it;
00187     // Only valid if it is non-empty and differs from the original filename.
00188     std::string fallbackName = fileIter_->fallbackFileName();
00189     bool hasFallbackUrl = !fallbackName.empty() && fallbackName != fileIter_->fileName();
00190 
00191     boost::shared_ptr<InputFile> filePtr;
00192     try {
00193       std::unique_ptr<InputSource::FileOpenSentry>
00194         sentry(inputType_ == InputType::Primary ? new InputSource::FileOpenSentry(input_) : 0);
00195       filePtr.reset(new InputFile(gSystem->ExpandPathName(fileIter_->fileName().c_str()), "  Initiating request to open file "));
00196     }
00197     catch (cms::Exception const& e) {
00198       if(!skipBadFiles) {
00199         if(hasFallbackUrl) {
00200           std::ostringstream out;
00201           out << e.explainSelf();
00202           std::string pfn(gSystem->ExpandPathName(fallbackName.c_str()));
00203           InputFile::reportFallbackAttempt(pfn, fileIter_->logicalFileName(), out.str());
00204         } else {
00205           InputFile::reportSkippedFile(fileIter_->fileName(), fileIter_->logicalFileName());
00206           Exception ex(errors::FileOpenError, "", e);
00207           ex.addContext("Calling RootInputFileSequence::initFile()");
00208           ex.clearMessage();
00209           ex << "Input file " << fileIter_->fileName() << " was not found, could not be opened, or is corrupted.\n";
00210           throw ex;
00211         }
00212       }
00213     }
00214     if(!filePtr && (hasFallbackUrl)) {
00215       try {
00216         std::unique_ptr<InputSource::FileOpenSentry>
00217           sentry(inputType_ == InputType::Primary ? new InputSource::FileOpenSentry(input_) : 0);
00218         filePtr.reset(new InputFile(gSystem->ExpandPathName(fallbackName.c_str()), "  Fallback request to file "));
00219       }
00220       catch (cms::Exception const& e) {
00221         if(!skipBadFiles) {
00222           InputFile::reportSkippedFile(fileIter_->fileName(), fileIter_->logicalFileName());
00223           Exception ex(errors::FallbackFileOpenError, "", e);
00224           ex.addContext("Calling RootInputFileSequence::initFile()");
00225           ex.clearMessage();
00226           ex << "Input file " << fileIter_->fileName() << " was not found, could not be opened, or is corrupted.\n";
00227           ex << "Fallback Input file " << fallbackName << " also was not found, could not be opened, or is corrupted.\n";
00228           throw ex;
00229         }
00230       }
00231     }
00232     if(filePtr) {
00233       std::vector<boost::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile = fileIter_ - fileIterBegin_;
00234       rootFile_ = RootFileSharedPtr(new RootFile(fileIter_->fileName(),
00235           processConfiguration(), fileIter_->logicalFileName(), filePtr,
00236           eventSkipperByID_, numberOfEventsToSkip_ != 0,
00237           remainingEvents(), remainingLuminosityBlocks(), treeCacheSize_, treeMaxVirtualSize_,
00238           input_.processingMode(),
00239           setRun_,
00240           noEventSort_,
00241           groupSelectorRules_,
00242           inputType_,
00243           duplicateChecker_,
00244           dropDescendants_,
00245           indexesIntoFiles_,
00246           currentIndexIntoFile,
00247           orderedProcessHistoryIDs_,
00248           labelRawDataLikeMC_,
00249           usingGoToEvent_));
00250 
00251       fileIterLastOpened_ = fileIter_;
00252       indexesIntoFiles_[currentIndexIntoFile] = rootFile_->indexIntoFileSharedPtr();
00253       char const* inputType = 0;
00254       switch(inputType_) {
00255       case InputType::Primary: inputType = "primaryFiles"; break;
00256       case InputType::SecondaryFile: inputType = "secondaryFiles"; break;
00257       case InputType::SecondarySource: inputType = "mixingFiles"; break;
00258       }
00259       rootFile_->reportOpened(inputType);
00260     } else {
00261       InputFile::reportSkippedFile(fileIter_->fileName(), fileIter_->logicalFileName());
00262       if(!skipBadFiles) {
00263         throw Exception(errors::FileOpenError) <<
00264            "RootInputFileSequence::initFile(): Input file " << fileIter_->fileName() << " was not found or could not be opened.\n";
00265       }
00266       LogWarning("") << "Input file: " << fileIter_->fileName() << " was not found or could not be opened, and will be skipped.\n";
00267     }
00268   }
00269 
00270   boost::shared_ptr<ProductRegistry const>
00271   RootInputFileSequence::fileProductRegistry() const {
00272     assert(rootFile_);
00273     return rootFile_->productRegistry();
00274   }
00275 
00276   bool RootInputFileSequence::nextFile(PrincipalCache& cache) {
00277     if(fileIter_ != fileIterEnd_) ++fileIter_;
00278     if(fileIter_ == fileIterEnd_) {
00279       if(inputType_ == InputType::Primary) {
00280         return false;
00281       } else {
00282         fileIter_ = fileIterBegin_;
00283       }
00284     }
00285 
00286     initFile(skipBadFiles_);
00287 
00288     if(inputType_ == InputType::Primary && rootFile_) {
00289       size_t size = productRegistry()->size();
00290       // make sure the new product registry is compatible with the main one
00291       std::string mergeInfo = productRegistryUpdate().merge(*rootFile_->productRegistry(),
00292                                                             fileIter_->fileName(),
00293                                                             parametersMustMatch_,
00294                                                             branchesMustMatch_);
00295       if(!mergeInfo.empty()) {
00296         throw Exception(errors::MismatchedInputFiles,"RootInputFileSequence::nextFile()") << mergeInfo;
00297       }
00298       if(productRegistry()->size() > size) {
00299         cache.adjustIndexesAfterProductRegistryAddition();
00300       }
00301       cache.adjustEventToNewProductRegistry(productRegistry());
00302     }
00303     return true;
00304   }
00305 
00306   bool RootInputFileSequence::previousFile(PrincipalCache& cache) {
00307     if(fileIter_ == fileIterBegin_) {
00308       if(inputType_ == InputType::Primary) {
00309         return false;
00310       } else {
00311         fileIter_ = fileIterEnd_;
00312       }
00313     }
00314     --fileIter_;
00315 
00316     initFile(false);
00317 
00318     if(inputType_ == InputType::Primary && rootFile_) {
00319       size_t size = productRegistry()->size();
00320       // make sure the new product registry is compatible to the main one
00321       std::string mergeInfo = productRegistryUpdate().merge(*rootFile_->productRegistry(),
00322                                                             fileIter_->fileName(),
00323                                                             parametersMustMatch_,
00324                                                             branchesMustMatch_);
00325       if(!mergeInfo.empty()) {
00326         throw Exception(errors::MismatchedInputFiles,"RootInputFileSequence::previousEvent()") << mergeInfo;
00327       }
00328       if(productRegistry()->size() > size) {
00329         cache.adjustIndexesAfterProductRegistryAddition();
00330       }
00331       cache.adjustEventToNewProductRegistry(productRegistry());
00332     }
00333     if(rootFile_) rootFile_->setToLastEntry();
00334     return true;
00335   }
00336 
00337   RootInputFileSequence::~RootInputFileSequence() {
00338   }
00339 
00340   boost::shared_ptr<RunAuxiliary>
00341   RootInputFileSequence::readRunAuxiliary_() {
00342     return rootFile_->readRunAuxiliary_();
00343   }
00344 
00345   boost::shared_ptr<LuminosityBlockAuxiliary>
00346   RootInputFileSequence::readLuminosityBlockAuxiliary_() {
00347     return rootFile_->readLuminosityBlockAuxiliary_();
00348   }
00349 
00350   boost::shared_ptr<RunPrincipal>
00351   RootInputFileSequence::readRun_(boost::shared_ptr<RunPrincipal> rpCache) {
00352     return rootFile_->readRun_(rpCache);
00353   }
00354 
00355   boost::shared_ptr<LuminosityBlockPrincipal>
00356   RootInputFileSequence::readLuminosityBlock_(boost::shared_ptr<LuminosityBlockPrincipal> lbCache) {
00357     return rootFile_->readLumi(lbCache);
00358   }
00359 
00360   // readEvent() is responsible for setting up the EventPrincipal.
00361   //
00362   //   1. create an EventPrincipal with a unique EventID
00363   //   2. For each entry in the provenance, put in one Group,
00364   //      holding the Provenance for the corresponding EDProduct.
00365   //   3. set up the caches in the EventPrincipal to know about this
00366   //      Group.
00367   //
00368   // We do *not* create the EDProduct instance (the equivalent of reading
00369   // the branch containing this EDProduct. That will be done by the Delayed Reader,
00370   //  when it is asked to do so.
00371   //
00372 
00373   EventPrincipal*
00374   RootInputFileSequence::readEvent(EventPrincipal& cache, boost::shared_ptr<LuminosityBlockPrincipal> lb) {
00375     return rootFile_->readEvent(cache, lb);
00376   }
00377 
00378   InputSource::ItemType
00379   RootInputFileSequence::getNextItemType() {
00380     if(fileIter_ == fileIterEnd_) {
00381       return InputSource::IsStop;
00382     }
00383     if(firstFile_) {
00384       return InputSource::IsFile;
00385     }
00386     if(rootFile_) {
00387       IndexIntoFile::EntryType entryType = rootFile_->getNextEntryTypeWanted();
00388       if(entryType == IndexIntoFile::kEvent) {
00389         return InputSource::IsEvent;
00390       } else if(entryType == IndexIntoFile::kLumi) {
00391         return InputSource::IsLumi;
00392       } else if(entryType == IndexIntoFile::kRun) {
00393         return InputSource::IsRun;
00394       }
00395       assert(entryType == IndexIntoFile::kEnd);
00396     }
00397     if(fileIter_ + 1 == fileIterEnd_) {
00398       return InputSource::IsStop;
00399     }
00400     return InputSource::IsFile;
00401   }
00402 
00403   // Rewind to before the first event that was read.
00404   void
00405   RootInputFileSequence::rewind_() {
00406     if(fileIter_ != fileIterBegin_) {
00407       closeFile_();
00408       fileIter_ = fileIterBegin_;
00409     }
00410     if(!rootFile_) {
00411       initFile(false);
00412     }
00413     rewindFile();
00414     firstFile_ = true;
00415   }
00416 
00417   // Rewind to the beginning of the current file
00418   void
00419   RootInputFileSequence::rewindFile() {
00420     if(rootFile_) rootFile_->rewind();
00421   }
00422 
00423   void
00424   RootInputFileSequence::reset(PrincipalCache& cache) {
00425     //NOTE: Need to handle duplicate checker
00426     // Also what if skipBadFiles_==true and the first time we succeeded but after a reset we fail?
00427     if(inputType_ != InputType::SecondarySource) {
00428       firstFile_ = true;
00429       for(fileIter_ = fileIterBegin_; fileIter_ != fileIterEnd_; ++fileIter_) {
00430         initFile(skipBadFiles_);
00431         if(rootFile_) break;
00432       }
00433       if(rootFile_) {
00434         if(numberOfEventsToSkip_ != 0) {
00435           skipEvents(numberOfEventsToSkip_, cache);
00436         }
00437       }
00438     }
00439   }
00440 
00441   // Advance "offset" events.  Offset can be positive or negative (or zero).
00442   bool
00443   RootInputFileSequence::skipEvents(int offset, PrincipalCache& cache) {
00444     assert (numberOfEventsToSkip_ == 0 || numberOfEventsToSkip_ == offset);
00445     numberOfEventsToSkip_ = offset;
00446     while(numberOfEventsToSkip_ != 0) {
00447       bool atEnd = rootFile_->skipEvents(numberOfEventsToSkip_);
00448       if((numberOfEventsToSkip_ > 0 || atEnd) && !nextFile(cache)) {
00449         numberOfEventsToSkip_ = 0;
00450         return false;
00451       }
00452       if(numberOfEventsToSkip_ < 0 && !previousFile(cache)) {
00453         numberOfEventsToSkip_ = 0;
00454         fileIter_ = fileIterEnd_;
00455         return false;
00456       }
00457     }
00458     return true;
00459   }
00460 
00461   bool
00462   RootInputFileSequence::goToEvent(EventID const& eventID) {
00463     usingGoToEvent_ = true;
00464     if(rootFile_) {
00465       if(rootFile_->goToEvent(eventID)) {
00466         return true;
00467       }
00468       // If only one input file, give up now, to save time.
00469       if(rootFile_ && indexesIntoFiles_.size() == 1) {
00470         return false;
00471       }
00472       // Save the current file and position so that we can restore them
00473       // if we fail to restore the desired event
00474       bool closedOriginalFile = false;
00475       std::vector<FileCatalogItem>::const_iterator originalFile = fileIter_;
00476       IndexIntoFile::IndexIntoFileItr originalPosition = rootFile_->indexIntoFileIter();
00477 
00478       // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files.
00479       typedef std::vector<boost::shared_ptr<IndexIntoFile> >::const_iterator Iter;
00480       for(Iter it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
00481         if(*it && (*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) {
00482           // We found it. Close the currently open file, and open the correct one.
00483           fileIter_ = fileIterBegin_ + (it - indexesIntoFiles_.begin());
00484           initFile(false);
00485           // Now get the item from the correct file.
00486           bool found = rootFile_->goToEvent(eventID);
00487           assert (found);
00488           return true;
00489         }
00490       }
00491       // Look for item in files not yet opened.
00492       for(Iter it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
00493         if(!*it) {
00494           fileIter_ = fileIterBegin_ + (it - indexesIntoFiles_.begin());
00495           initFile(false);
00496           closedOriginalFile = true;
00497           if((*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) {
00498             if  (rootFile_->goToEvent(eventID)) {
00499               return true;
00500             }
00501           }
00502         }
00503       }
00504       if(closedOriginalFile) {
00505         fileIter_ = originalFile;
00506         initFile(false);
00507         rootFile_->setPosition(originalPosition);
00508       }
00509     }
00510     return false;
00511   }
00512 
00513   bool
00514   RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) {
00515     // Look for item in files not yet opened.
00516     typedef std::vector<boost::shared_ptr<IndexIntoFile> >::const_iterator Iter;
00517     for(Iter it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
00518       if(!*it) {
00519         fileIter_ = fileIterBegin_ + (it - indexesIntoFiles_.begin());
00520         initFile(false);
00521         bool found = rootFile_->setEntryAtItem(run, lumi, event);
00522         if(found) {
00523           return true;
00524         }
00525       }
00526     }
00527     // Not found
00528     return false;
00529   }
00530 
00531   bool
00532   RootInputFileSequence::skipToItem(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, bool currentFileFirst) {
00533     // Attempt to find item in currently open input file.
00534     bool found = currentFileFirst && rootFile_ && rootFile_->setEntryAtItem(run, lumi, event);
00535     if(!found) {
00536       // If only one input file, give up now, to save time.
00537       if(currentFileFirst && rootFile_ && indexesIntoFiles_.size() == 1) {
00538         return false;
00539       }
00540       // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files.
00541       typedef std::vector<boost::shared_ptr<IndexIntoFile> >::const_iterator Iter;
00542       for(Iter it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
00543         if(*it && (*it)->containsItem(run, lumi, event)) {
00544           // We found it. Close the currently open file, and open the correct one.
00545           std::vector<FileCatalogItem>::const_iterator currentIter = fileIter_;
00546           fileIter_ = fileIterBegin_ + (it - indexesIntoFiles_.begin());
00547           if(fileIter_ != currentIter) {
00548             initFile(false);
00549           }
00550           // Now get the item from the correct file.
00551           found = rootFile_->setEntryAtItem(run, lumi, event);
00552           assert (found);
00553           return true;
00554         }
00555       }
00556       // Look for item in files not yet opened.
00557       return skipToItemInNewFile(run, lumi, event);
00558     }
00559     return true;
00560   }
00561 
00562   ProcessConfiguration const&
00563   RootInputFileSequence::processConfiguration() const {
00564     return input_.processConfiguration();
00565   }
00566 
00567   int
00568   RootInputFileSequence::remainingEvents() const {
00569     return input_.remainingEvents();
00570   }
00571 
00572   int
00573   RootInputFileSequence::remainingLuminosityBlocks() const {
00574     return input_.remainingLuminosityBlocks();
00575   }
00576 
00577   ProductRegistry &
00578   RootInputFileSequence::productRegistryUpdate() const{
00579     return input_.productRegistryUpdate();
00580   }
00581 
00582   boost::shared_ptr<ProductRegistry const>
00583   RootInputFileSequence::productRegistry() const{
00584     return input_.productRegistry();
00585   }
00586 
00587   void
00588   RootInputFileSequence::dropUnwantedBranches_(std::vector<std::string> const& wantedBranches) {
00589     std::vector<std::string> rules;
00590     rules.reserve(wantedBranches.size() + 1);
00591     rules.emplace_back("drop *");
00592     for(std::vector<std::string>::const_iterator it = wantedBranches.begin(), itEnd = wantedBranches.end();
00593         it != itEnd; ++it) {
00594       rules.push_back("keep " + *it + "_*");
00595     }
00596     ParameterSet pset;
00597     pset.addUntrackedParameter("inputCommands", rules);
00598     groupSelectorRules_ = GroupSelectorRules(pset, "inputCommands", "InputSource");
00599   }
00600 
00601   EventPrincipal*
00602   RootInputFileSequence::readOneSequential() {
00603     skipBadFiles_ = false;
00604     if(fileIter_ == fileIterEnd_ || !rootFile_) {
00605       if(fileIterEnd_ == fileIterBegin_) {
00606         throw Exception(errors::Configuration) << "RootInputFileSequence::readOneSequential(): no input files specified.\n";
00607       }
00608       fileIter_ = fileIterBegin_;
00609       initFile(false);
00610       rootFile_->setAtEventEntry(-1);
00611     }
00612     rootFile_->nextEventEntry();
00613     EventPrincipal* ep = rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal());
00614     if(ep == 0) {
00615       ++fileIter_;
00616       if(fileIter_ == fileIterEnd_) {
00617         return 0;
00618       }
00619       initFile(false);
00620       rootFile_->setAtEventEntry(-1);
00621       return readOneSequential();
00622     }
00623     return ep;
00624   }
00625 
00626   EventPrincipal*
00627   RootInputFileSequence::readOneSequentialWithID(LuminosityBlockID const& id) {
00628     if(fileIterEnd_ == fileIterBegin_) {
00629       throw Exception(errors::Configuration) << "RootInputFileSequence::readOneSequentialWithID(): no input files specified.\n";
00630     }
00631     skipBadFiles_ = false;
00632     if(fileIter_ == fileIterEnd_ || !rootFile_ ||
00633         rootFile_->indexIntoFileIter().run() != id.run() || 
00634         rootFile_->indexIntoFileIter().lumi() != id.luminosityBlock()) {
00635       bool found = skipToItem(id.run(), id.luminosityBlock(), 0, false);
00636       if(!found) {
00637         return 0;
00638       }
00639     }
00640     bool nextFound = rootFile_->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
00641     EventPrincipal* ep = (nextFound ? rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal()) : 0);
00642     if(ep == 0) {
00643       bool found = skipToItemInNewFile(id.run(), id.luminosityBlock(), 0);
00644       if(found) {
00645         return readOneSequentialWithID(id);
00646       }
00647     }
00648     return ep;
00649   }
00650 
00651   EventPrincipal*
00652   RootInputFileSequence::readOneSpecified(EventID const& id) {
00653     skipBadFiles_ = false;
00654     bool found = skipToItem(id.run(), id.luminosityBlock(), id.event());
00655     if(!found) {
00656       throw Exception(errors::NotFound) <<
00657          "RootInputFileSequence::readOneSpecified(): Secondary Input file " <<
00658          fileIter_->fileName() <<
00659          " does not contain specified event:\n" << id << "\n";
00660     }
00661     EventPrincipal* ep = rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal());
00662     assert(ep != 0);
00663     return ep;
00664   }
00665 
00666   EventPrincipal*
00667   RootInputFileSequence::readOneRandom() {
00668     if(fileIterEnd_ == fileIterBegin_) {
00669       throw Exception(errors::Configuration) << "RootInputFileSequence::readOneRandom(): no input files specified.\n";
00670     }
00671     if(!flatDistribution_) {
00672       Service<RandomNumberGenerator> rng;
00673       CLHEP::HepRandomEngine& engine = rng->getEngine();
00674       flatDistribution_.reset(new CLHEP::RandFlat(engine));
00675     }
00676     skipBadFiles_ = false;
00677     unsigned int currentSeqNumber = fileIter_ - fileIterBegin_;
00678     while(eventsRemainingInFile_ == 0) {
00679       fileIter_ = fileIterBegin_ + flatDistribution_->fireInt(fileCatalogItems().size());
00680       unsigned int newSeqNumber = fileIter_ - fileIterBegin_;
00681       if(newSeqNumber != currentSeqNumber) {
00682         initFile(false);
00683         currentSeqNumber = newSeqNumber;
00684       }
00685       eventsRemainingInFile_ = rootFile_->eventTree().entries();
00686       if(eventsRemainingInFile_ == 0) {
00687         throw Exception(errors::NotFound) <<
00688            "RootInputFileSequence::readOneRandom(): Secondary Input file " << fileIter_->fileName() << " contains no events.\n";
00689       }
00690       rootFile_->setAtEventEntry(flatDistribution_->fireInt(eventsRemainingInFile_) - 1);
00691     }
00692     rootFile_->nextEventEntry();
00693 
00694     EventPrincipal* ep = rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal());
00695     if(ep == 0) {
00696       rootFile_->setAtEventEntry(0);
00697       ep = rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal());
00698       assert(ep != 0);
00699     }
00700     --eventsRemainingInFile_;
00701     return ep;
00702   }
00703 
00704   // bool RootFile::setEntryAtNextEventInLumi(RunNumber_t run, LuminosityBlockNumber_t lumi) {
00705 
00706   EventPrincipal*
00707   RootInputFileSequence::readOneRandomWithID(LuminosityBlockID const& id) {
00708     if(fileIterEnd_ == fileIterBegin_) {
00709       throw Exception(errors::Configuration) << "RootInputFileSequence::readOneRandomWithID(): no input files specified.\n";
00710     }
00711     if(!flatDistribution_) {
00712       Service<RandomNumberGenerator> rng;
00713       CLHEP::HepRandomEngine& engine = rng->getEngine();
00714       flatDistribution_.reset(new CLHEP::RandFlat(engine));
00715     }
00716     skipBadFiles_ = false;
00717     if(fileIter_ == fileIterEnd_ || !rootFile_ ||
00718         rootFile_->indexIntoFileIter().run() != id.run() || 
00719         rootFile_->indexIntoFileIter().lumi() != id.luminosityBlock()) {
00720       bool found = skipToItem(id.run(), id.luminosityBlock(), 0);
00721       if(!found) {
00722         return 0;
00723       }
00724       int eventsInLumi = 0;
00725       while(rootFile_->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock())) ++eventsInLumi;
00726       found = skipToItem(id.run(), id.luminosityBlock(), 0);
00727       assert(found);
00728       int eventInLumi = flatDistribution_->fireInt(eventsInLumi);
00729       for(int i = 0; i < eventInLumi; ++i) {
00730         bool found = rootFile_->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
00731         assert(found);
00732       }
00733     }
00734     bool nextFound = rootFile_->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock());
00735     EventPrincipal* ep = (nextFound ? rootFile_->clearAndReadCurrentEvent(rootFile_->secondaryEventPrincipal()) : 0);
00736     if(ep == 0) {
00737       bool found = rootFile_->setEntryAtItem(id.run(), id.luminosityBlock(), 0);
00738       if(found) {
00739         return readOneRandomWithID(id);
00740       }
00741     }
00742     return ep;
00743   }
00744 
00745   void
00746   RootInputFileSequence::fillDescription(ParameterSetDescription & desc) {
00747     desc.addUntracked<unsigned int>("skipEvents", 0U)
00748         ->setComment("Skip the first 'skipEvents' events that otherwise would have been processed.");
00749     desc.addUntracked<bool>("noEventSort", true)
00750         ->setComment("True:  Process runs, lumis and events in the order they appear in the file (but see notes 1 and 2).\n"
00751                      "False: Process runs, lumis and events in each file in numerical order (run#, lumi#, event#) (but see note 3).\n"
00752                      "Note 1: Events within the same lumi will always be processed contiguously.\n"
00753                      "Note 2: Lumis within the same run will always be processed contiguously.\n"
00754                      "Note 3: Any sorting occurs independently in each input file (no sorting across input files).");
00755     desc.addUntracked<bool>("skipBadFiles", false)
00756         ->setComment("True:  Ignore any missing or unopenable input file.\n"
00757                      "False: Throw exception if missing or unopenable input file.");
00758     desc.addUntracked<unsigned int>("cacheSize", roottree::defaultCacheSize)
00759         ->setComment("Size of ROOT TTree prefetch cache.  Affects performance.");
00760     desc.addUntracked<int>("treeMaxVirtualSize", -1)
00761         ->setComment("Size of ROOT TTree TBasket cache.  Affects performance.");
00762     desc.addUntracked<unsigned int>("setRunNumber", 0U)
00763         ->setComment("If non-zero, change number of first run to this number. Apply same offset to all runs.  Allowed only for simulation.");
00764     desc.addUntracked<bool>("dropDescendantsOfDroppedBranches", true)
00765         ->setComment("If True, also drop on input any descendent of any branch dropped on input.");
00766     std::string defaultString("permissive");
00767     desc.addUntracked<std::string>("parametersMustMatch", defaultString)
00768         ->setComment("'strict':     Values of tracked parameters must be unique across all input files.\n"
00769                      "'permissive': Values of tracked parameters may differ across or within files.");
00770     desc.addUntracked<std::string>("branchesMustMatch", defaultString)
00771         ->setComment("'strict':     Branches in each input file must match those in the first file.\n"
00772                      "'permissive': Branches in each input file may be any subset of those in the first file.");
00773     desc.addUntracked<bool>("labelRawDataLikeMC", true)
00774         ->setComment("If True: replace module label for raw data to match MC. Also use 'LHC' as process.");
00775 
00776     GroupSelectorRules::fillDescription(desc, "inputCommands");
00777     EventSkipperByID::fillDescription(desc);
00778     DuplicateChecker::fillDescription(desc);
00779   }
00780 
00781   ProcessingController::ForwardState
00782   RootInputFileSequence::forwardState() const {
00783     if(rootFile_) {
00784       if(!rootFile_->wasLastEventJustRead()) {
00785         return ProcessingController::kEventsAheadInFile;
00786       }
00787       std::vector<FileCatalogItem>::const_iterator itr(fileIter_);
00788       if(itr != fileIterEnd_) ++itr;
00789       if(itr != fileIterEnd_) {
00790         return ProcessingController::kNextFileExists;
00791       }
00792       return ProcessingController::kAtLastEvent;
00793     }
00794     return ProcessingController::kUnknownForward;
00795   }
00796 
00797   ProcessingController::ReverseState
00798   RootInputFileSequence::reverseState() const {
00799     if(rootFile_) {
00800       if(!rootFile_->wasFirstEventJustRead()) {
00801         return ProcessingController::kEventsBackwardsInFile;
00802       }
00803       if(fileIter_ != fileIterBegin_) {
00804         return ProcessingController::kPreviousFileExists;
00805       }
00806       return ProcessingController::kAtFirstEvent;
00807     }
00808     return ProcessingController::kUnknownReverse;
00809   }
00810 }