00001 00002 #include "IOPool/Input/src/DuplicateChecker.h" 00003 #include "FWCore/ParameterSet/interface/ParameterSet.h" 00004 #include "FWCore/Utilities/interface/Exception.h" 00005 #include "FWCore/MessageLogger/interface/MessageLogger.h" 00006 #include "DataFormats/Provenance/interface/FileIndex.h" 00007 00008 #include <cassert> 00009 00010 namespace edm { 00011 00012 DuplicateChecker::DuplicateChecker(ParameterSet const& pset) : 00013 00014 duplicateCheckMode_(checkEachRealDataFile), 00015 dataType_(unknown), 00016 eventIDs_(), 00017 itIsKnownTheFileHasNoDuplicates_(false) 00018 { 00019 std::string duplicateCheckMode = 00020 pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkEachRealDataFile")); 00021 00022 if (duplicateCheckMode == std::string("noDuplicateCheck")) duplicateCheckMode_ = noDuplicateCheck; 00023 else if (duplicateCheckMode == std::string("checkEachFile")) duplicateCheckMode_ = checkEachFile; 00024 else if (duplicateCheckMode == std::string("checkEachRealDataFile")) duplicateCheckMode_ = checkEachRealDataFile; 00025 else if (duplicateCheckMode == std::string("checkAllFilesOpened")) duplicateCheckMode_ = checkAllFilesOpened; 00026 else { 00027 throw cms::Exception("Configuration") 00028 << "Illegal configuration parameter value passed to PoolSource for\n" 00029 << "the \"duplicateCheckMode\" parameter, legal values are:\n" 00030 << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n"; 00031 } 00032 } 00033 00034 void DuplicateChecker::init(bool realData, 00035 FileIndex const& fileIndex) 00036 { 00037 if (duplicateCheckMode_ == noDuplicateCheck) return; 00038 if (duplicateCheckMode_ == checkAllFilesOpened) return; 00039 00040 assert(dataType_ == unknown); 00041 dataType_ = realData ? isRealData : isSimulation; 00042 00043 if (duplicateCheckMode_ == checkEachFile || 00044 (duplicateCheckMode_ == checkEachRealDataFile && dataType_ == isRealData)) { 00045 itIsKnownTheFileHasNoDuplicates_ = fileIndex.eventsUniqueAndOrdered(); 00046 } 00047 } 00048 00049 void DuplicateChecker::inputFileClosed() 00050 { 00051 if (duplicateCheckMode_ == noDuplicateCheck) return; 00052 if (duplicateCheckMode_ == checkAllFilesOpened) return; 00053 00054 dataType_ = unknown; 00055 eventIDs_.clear(); 00056 itIsKnownTheFileHasNoDuplicates_ = false; 00057 } 00058 00059 void DuplicateChecker::rewind() 00060 { 00061 eventIDs_.clear(); 00062 } 00063 00064 bool DuplicateChecker::isDuplicateAndCheckActive(EventID const& eventID, 00065 LuminosityBlockNumber_t const& lumi, 00066 std::string const& fileName) 00067 { 00068 if (duplicateCheckMode_ == noDuplicateCheck) return false; 00069 if (duplicateCheckMode_ == checkEachRealDataFile && dataType_ == isSimulation) return false; 00070 00071 if (duplicateCheckMode_ == checkEachFile || 00072 duplicateCheckMode_ == checkEachRealDataFile) { 00073 assert(dataType_ != unknown); 00074 if (itIsKnownTheFileHasNoDuplicates_) return false; 00075 } 00076 00077 bool duplicate = !eventIDs_.insert(eventID).second; 00078 00079 if (duplicate) { 00080 if (duplicateCheckMode_ == checkAllFilesOpened) { 00081 LogWarning("DuplicateEvent") 00082 << "Duplicate Events found in entire set of input files.\n" 00083 << "Both events were from run " << eventID.run() 00084 << " with event number " << eventID.event() << ".\n" 00085 << "The duplicate had luminosity block number " << lumi 00086 << " and was from file " << fileName << ".\n" 00087 << "The duplicate will be skipped.\n"; 00088 } 00089 else { 00090 LogWarning("DuplicateEvent") 00091 << "Duplicate Events found in file " << fileName << ".\n" 00092 << "Both events were from run " << eventID.run() 00093 << " with event number " << eventID.event() << ".\n" 00094 << "The duplicate had luminosity block number " << lumi << ".\n" 00095 << "The duplicate will be skipped.\n"; 00096 } 00097 return true; 00098 } 00099 return false; 00100 } 00101 }