CMS 3D CMS Logo

DuplicateChecker.cc
Go to the documentation of this file.
1 
7 
8 #include <cassert>
9 #include <algorithm>
10 
11 namespace edm {
12 
14  : dataType_(unknown), itIsKnownTheFileHasNoDuplicates_(false), disabled_(false) {
15  // The default value provided as the second argument to the getUntrackedParameter function call
16  // is not used when the ParameterSet has been validated and the parameters are not optional
17  // in the description. This is currently true when PoolSource is the primary input source.
18  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
19  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
20  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
21  // and should be deleted from the code.
23  pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
24 
25  if (duplicateCheckMode == std::string("noDuplicateCheck"))
27  else if (duplicateCheckMode == std::string("checkEachFile"))
29  else if (duplicateCheckMode == std::string("checkEachRealDataFile"))
31  else if (duplicateCheckMode == std::string("checkAllFilesOpened"))
33  else {
34  throw cms::Exception("Configuration")
35  << "Illegal configuration parameter value passed to PoolSource for\n"
36  << "the \"duplicateCheckMode\" parameter, legal values are:\n"
37  << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
38  }
39  }
40 
42  disabled_ = true;
46  }
47 
49  IndexIntoFile const& indexIntoFile,
50  std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
51  std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
52  dataType_ = realData ? isRealData : isSimulation;
53  if (checkDisabled())
54  return;
55 
58 
60  // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
61  // One unintended thing, it also saves the duplicate runs and lumis.
62  for (std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
63  if (indexesIntoFiles[i].get() != nullptr) {
64  indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
65  }
66  }
67  }
68  if (relevantPreviousEvents_.empty()) {
69  if (!indexIntoFile.containsDuplicateEvents()) {
71  }
72  }
73  }
74 
79  }
80 
84  return false;
85  if (checkDisabled())
86  return false;
87 
89  bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
90 
91  if (duplicate) {
93  LogWarning("DuplicateEvent") << "Duplicate Events found in entire set of input files.\n"
94  << "Both events were from run " << run << " and luminosity block " << lumi
95  << " with event number " << event << ".\n"
96  << "The duplicate was from file " << fileName << ".\n"
97  << "The duplicate will be skipped.\n";
98  } else {
99  LogWarning("DuplicateEvent") << "Duplicate Events found in file " << fileName << ".\n"
100  << "Both events were from run " << run << " and luminosity block " << lumi
101  << " with event number " << event << ".\n"
102  << "The duplicate will be skipped.\n";
103  }
104  return true;
105  }
106  return false;
107  }
108 
110  std::string defaultString("checkAllFilesOpened");
111  desc.addUntracked<std::string>("duplicateCheckMode", defaultString)
112  ->setComment(
113  "'checkAllFilesOpened': check across all input files\n"
114  "'checkEachFile': check each input file independently\n"
115  "'checkEachRealDataFile': check each real data input file independently\n"
116  "'noDuplicateCheck': no duplicate checking\n");
117  }
118 } // namespace edm
edm::RunNumber_t
unsigned int RunNumber_t
Definition: RunLumiEventNumber.h:14
mps_fire.i
i
Definition: mps_fire.py:355
DuplicateChecker.h
edm::DuplicateChecker::disable
void disable()
Definition: DuplicateChecker.cc:41
MessageLogger.h
funct::false
false
Definition: Factorize.h:34
edm::DuplicateChecker::inputFileClosed
void inputFileClosed()
Definition: DuplicateChecker.cc:75
edm::DuplicateChecker::duplicateCheckMode_
DuplicateCheckMode duplicateCheckMode_
Definition: DuplicateChecker.h:62
edm::DuplicateChecker::itIsKnownTheFileHasNoDuplicates_
bool itIsKnownTheFileHasNoDuplicates_
Definition: DuplicateChecker.h:75
edm::DuplicateChecker::isDuplicateAndCheckActive
bool isDuplicateAndCheckActive(int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const &fileName)
Definition: DuplicateChecker.cc:81
edm::DuplicateChecker::isRealData
Definition: DuplicateChecker.h:64
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::DuplicateChecker::checkEachRealDataFile
Definition: DuplicateChecker.h:60
edm::IndexIntoFile::set_intersection
void set_intersection(IndexIntoFile const &indexIntoFile, std::set< IndexRunLumiEventKey > &intersection) const
Definition: IndexIntoFile.cc:716
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
edm::IndexIntoFile
Definition: IndexIntoFile.h:225
edm::DuplicateChecker::fillDescription
static void fillDescription(ParameterSetDescription &desc)
Definition: DuplicateChecker.cc:109
EcnaPython_AdcPeg12_S1_10_R170298_1_0_150_Dee0.duplicateCheckMode
duplicateCheckMode
Definition: EcnaPython_AdcPeg12_S1_10_R170298_1_0_150_Dee0.py:23
MillePedeFileConverter_cfg.fileName
fileName
Definition: MillePedeFileConverter_cfg.py:32
edm::DuplicateChecker::checkEachFile
Definition: DuplicateChecker.h:60
edm::LuminosityBlockNumber_t
unsigned int LuminosityBlockNumber_t
Definition: RunLumiEventNumber.h:13
edm::IndexIntoFile::IndexRunLumiEventKey
Definition: IndexIntoFile.h:869
edm::DuplicateChecker::disabled_
bool disabled_
Definition: DuplicateChecker.h:77
trigger::size_type
uint16_t size_type
Definition: TriggerTypeDefs.h:18
edm::DuplicateChecker::checkDisabled
bool checkDisabled() const
Definition: DuplicateChecker.h:45
edm::DuplicateChecker::checkAllFilesOpened
Definition: DuplicateChecker.h:60
edm::DuplicateChecker::dataType_
DataType dataType_
Definition: DuplicateChecker.h:66
edm::EventNumber_t
unsigned long long EventNumber_t
Definition: RunLumiEventNumber.h:12
ParameterSetDescription.h
AlCaHLTBitMon_QueryRunRegistry.string
string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
edm::LogWarning
Definition: MessageLogger.h:141
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:100
edm::ParameterSet
Definition: ParameterSet.h:36
edm::IndexIntoFile::containsDuplicateEvents
bool containsDuplicateEvents() const
Returns true if the IndexIntoFile contains 2 events with the same ProcessHistoryID index,...
Definition: IndexIntoFile.cc:818
edm::get
T const & get(Event const &event, InputTag const &tag) noexcept(false)
Definition: Event.h:669
writedatasetfile.run
run
Definition: writedatasetfile.py:27
edm::DuplicateChecker::unknown
Definition: DuplicateChecker.h:64
edm::DuplicateChecker::isSimulation
Definition: DuplicateChecker.h:64
edm::DuplicateChecker::inputFileOpened
void inputFileOpened(bool realData, IndexIntoFile const &indexIntoFile, std::vector< std::shared_ptr< IndexIntoFile > > const &indexesIntoFiles, std::vector< std::shared_ptr< IndexIntoFile > >::size_type currentIndexIntoFile)
Definition: DuplicateChecker.cc:48
Exception
Definition: hltDiff.cc:246
Exception.h
AlignmentPI::index
index
Definition: AlignmentPayloadInspectorHelper.h:46
edm::DuplicateChecker::relevantPreviousEvents_
std::set< IndexIntoFile::IndexRunLumiEventKey > relevantPreviousEvents_
Definition: DuplicateChecker.h:73
ParameterSet.h
event
Definition: event.py:1
lumi
Definition: LumiSectionData.h:20
edm::DuplicateChecker::noDuplicateCheck
Definition: DuplicateChecker.h:60
edm::DuplicateChecker::DuplicateChecker
DuplicateChecker(ParameterSet const &pset)
Definition: DuplicateChecker.cc:13
muonDTDigis_cfi.pset
pset
Definition: muonDTDigis_cfi.py:27
susybsm::HSCParticleType::unknown
Definition: HSCParticle.h:20