CMS 3D CMS Logo

DuplicateChecker.cc
Go to the documentation of this file.
1 
7 
8 #include <cassert>
9 #include <algorithm>
10 
11 namespace edm {
12 
14  dataType_(unknown),
15  itIsKnownTheFileHasNoDuplicates_(false),
16  disabled_(false)
17  {
18  // The default value provided as the second argument to the getUntrackedParameter function call
19  // is not used when the ParameterSet has been validated and the parameters are not optional
20  // in the description. This is currently true when PoolSource is the primary input source.
21  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
22  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
23  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
24  // and should be deleted from the code.
26  pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
27 
28  if (duplicateCheckMode == std::string("noDuplicateCheck")) duplicateCheckMode_ = noDuplicateCheck;
29  else if (duplicateCheckMode == std::string("checkEachFile")) duplicateCheckMode_ = checkEachFile;
30  else if (duplicateCheckMode == std::string("checkEachRealDataFile")) duplicateCheckMode_ = checkEachRealDataFile;
31  else if (duplicateCheckMode == std::string("checkAllFilesOpened")) duplicateCheckMode_ = checkAllFilesOpened;
32  else {
33  throw cms::Exception("Configuration")
34  << "Illegal configuration parameter value passed to PoolSource for\n"
35  << "the \"duplicateCheckMode\" parameter, legal values are:\n"
36  << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
37  }
38  }
39 
41  disabled_ = true;
45  }
46 
48  bool realData,
49  IndexIntoFile const& indexIntoFile,
50  std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
51  std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
52 
53  dataType_ = realData ? isRealData : isSimulation;
54  if (checkDisabled()) return;
55 
58 
60 
61  // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
62  // One unintended thing, it also saves the duplicate runs and lumis.
63  for(std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
64  if (indexesIntoFiles[i].get() != nullptr) {
65 
66  indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
67  }
68  }
69  }
70  if (relevantPreviousEvents_.empty()) {
71  if(!indexIntoFile.containsDuplicateEvents()) {
73  }
74  }
75  }
76 
78  {
82  }
83 
88  std::string const& fileName) {
89  if (itIsKnownTheFileHasNoDuplicates_) return false;
90  if (checkDisabled()) return false;
91 
92  IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
93  bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
94 
95  if (duplicate) {
97  LogWarning("DuplicateEvent")
98  << "Duplicate Events found in entire set of input files.\n"
99  << "Both events were from run " << run
100  << " and luminosity block " << lumi
101  << " with event number " << event << ".\n"
102  << "The duplicate was from file " << fileName << ".\n"
103  << "The duplicate will be skipped.\n";
104  }
105  else {
106  LogWarning("DuplicateEvent")
107  << "Duplicate Events found in file " << fileName << ".\n"
108  << "Both events were from run " << run
109  << " and luminosity block " << lumi
110  << " with event number " << event << ".\n"
111  << "The duplicate will be skipped.\n";
112  }
113  return true;
114  }
115  return false;
116  }
117 
118  void
120  std::string defaultString("checkAllFilesOpened");
121  desc.addUntracked<std::string>("duplicateCheckMode", defaultString)->setComment(
122  "'checkAllFilesOpened': check across all input files\n"
123  "'checkEachFile': check each input file independently\n"
124  "'checkEachRealDataFile': check each real data input file independently\n"
125  "'noDuplicateCheck': no duplicate checking\n"
126  );
127  }
128 }
T getUntrackedParameter(std::string const &, T const &) const
bool checkDisabled() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
unsigned long long EventNumber_t
static void fillDescription(ParameterSetDescription &desc)
bool isDuplicateAndCheckActive(int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const &fileName)
uint16_t size_type
unsigned int LuminosityBlockNumber_t
void set_intersection(IndexIntoFile const &indexIntoFile, std::set< IndexRunLumiEventKey > &intersection) const
bool containsDuplicateEvents() const
Returns true if the IndexIntoFile contains 2 events with the same ProcessHistoryID index...
HLT enums.
std::set< IndexIntoFile::IndexRunLumiEventKey > relevantPreviousEvents_
unsigned int RunNumber_t
DuplicateChecker(ParameterSet const &pset)
void inputFileOpened(bool realData, IndexIntoFile const &indexIntoFile, std::vector< std::shared_ptr< IndexIntoFile > > const &indexesIntoFiles, std::vector< std::shared_ptr< IndexIntoFile > >::size_type currentIndexIntoFile)
DuplicateCheckMode duplicateCheckMode_
Definition: event.py:1