CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_13_patch3/src/IOPool/Input/src/DuplicateChecker.cc

Go to the documentation of this file.
00001 
00002 #include "IOPool/Input/src/DuplicateChecker.h"
00003 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00004 #include "FWCore/Utilities/interface/Exception.h"
00005 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00006 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
00007 
00008 #include <cassert>
00009 #include <algorithm>
00010 
00011 namespace edm {
00012 
00013   DuplicateChecker::DuplicateChecker(ParameterSet const& pset) :
00014     dataType_(unknown),
00015     itIsKnownTheFileHasNoDuplicates_(false),
00016     disabled_(false)
00017   {
00018     // The default value provided as the second argument to the getUntrackedParameter function call
00019     // is not used when the ParameterSet has been validated and the parameters are not optional
00020     // in the description.  This is currently true when PoolSource is the primary input source.
00021     // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
00022     // yet, so the ParameterSet does not get validated yet.  As soon as all the modules with a SecSource
00023     // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
00024     // and should be deleted from the code.
00025     std::string duplicateCheckMode =
00026       pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
00027 
00028     if (duplicateCheckMode == std::string("noDuplicateCheck")) duplicateCheckMode_ = noDuplicateCheck;
00029     else if (duplicateCheckMode == std::string("checkEachFile")) duplicateCheckMode_ = checkEachFile;
00030     else if (duplicateCheckMode == std::string("checkEachRealDataFile")) duplicateCheckMode_ = checkEachRealDataFile;
00031     else if (duplicateCheckMode == std::string("checkAllFilesOpened")) duplicateCheckMode_ = checkAllFilesOpened;
00032     else {
00033       throw cms::Exception("Configuration")
00034         << "Illegal configuration parameter value passed to PoolSource for\n"
00035         << "the \"duplicateCheckMode\" parameter, legal values are:\n"
00036         << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
00037     }
00038   }
00039 
00040   void DuplicateChecker::disable() {
00041     disabled_ = true;
00042     dataType_ = unknown;
00043     relevantPreviousEvents_.clear();
00044     itIsKnownTheFileHasNoDuplicates_ = false;
00045   }
00046 
00047   void DuplicateChecker::inputFileOpened(
00048       bool realData,
00049       IndexIntoFile const& indexIntoFile,
00050       std::vector<boost::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
00051       std::vector<boost::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
00052 
00053     dataType_ = realData ? isRealData : isSimulation;
00054     if (checkDisabled()) return;
00055 
00056     relevantPreviousEvents_.clear();
00057     itIsKnownTheFileHasNoDuplicates_ = false;
00058 
00059     if (duplicateCheckMode_ == checkAllFilesOpened) {
00060 
00061       // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
00062       // One unintended thing, it also saves the duplicate runs and lumis.
00063       for(std::vector<boost::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
00064         if (indexesIntoFiles[i].get() != 0) {
00065 
00066           indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
00067         }
00068       }
00069     }
00070     if (relevantPreviousEvents_.empty()) {
00071       if(!indexIntoFile.containsDuplicateEvents()) {
00072         itIsKnownTheFileHasNoDuplicates_ = true;
00073       }
00074     }
00075   }
00076 
00077   void DuplicateChecker::inputFileClosed()
00078   {
00079     dataType_ = unknown;
00080     relevantPreviousEvents_.clear();
00081     itIsKnownTheFileHasNoDuplicates_ = false;
00082   }
00083 
00084   bool DuplicateChecker::isDuplicateAndCheckActive(int index,
00085                                                    RunNumber_t run,
00086                                                    LuminosityBlockNumber_t lumi,
00087                                                    EventNumber_t event,
00088                                                    std::string const& fileName) {
00089     if (itIsKnownTheFileHasNoDuplicates_) return false;
00090     if (checkDisabled()) return false;
00091 
00092     IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
00093     bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
00094 
00095     if (duplicate) {
00096       if (duplicateCheckMode_ == checkAllFilesOpened) {
00097         LogWarning("DuplicateEvent")
00098           << "Duplicate Events found in entire set of input files.\n"
00099           << "Both events were from run " << run 
00100           << " and luminosity block " << lumi
00101           << " with event number " << event << ".\n"
00102           << "The duplicate was from file " << fileName << ".\n"
00103           << "The duplicate will be skipped.\n";
00104       }
00105       else {
00106         LogWarning("DuplicateEvent")
00107           << "Duplicate Events found in file " << fileName << ".\n"
00108           << "Both events were from run " << run
00109           << " and luminosity block " << lumi
00110           << " with event number " << event << ".\n"
00111           << "The duplicate will be skipped.\n";
00112       }
00113       return true;
00114     }
00115     return false;
00116   }
00117 
00118   void
00119   DuplicateChecker::fillDescription(ParameterSetDescription & desc) {
00120     std::string defaultString("checkAllFilesOpened");
00121     desc.addUntracked<std::string>("duplicateCheckMode", defaultString)->setComment(
00122         "'checkAllFilesOpened':   check across all input files\n"
00123         "'checkEachFile':         check each input file independently\n"
00124         "'checkEachRealDataFile': check each real data input file independently\n"
00125         "'noDuplicateCheck':      no duplicate checking\n"
00126     );
00127   }
00128 }