CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
DuplicateChecker.cc
Go to the documentation of this file.
1 
7 
8 #include <cassert>
9 #include <algorithm>
10 
11 namespace edm {
12 
14  : dataType_(unknown), itIsKnownTheFileHasNoDuplicates_(false), disabled_(false) {
15  // The default value provided as the second argument to the getUntrackedParameter function call
16  // is not used when the ParameterSet has been validated and the parameters are not optional
17  // in the description. This is currently true when PoolSource is the primary input source.
18  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
19  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
20  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
21  // and should be deleted from the code.
23  pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
24 
25  if (duplicateCheckMode == std::string("noDuplicateCheck"))
27  else if (duplicateCheckMode == std::string("checkEachFile"))
29  else if (duplicateCheckMode == std::string("checkEachRealDataFile"))
31  else if (duplicateCheckMode == std::string("checkAllFilesOpened"))
33  else {
34  throw cms::Exception("Configuration")
35  << "Illegal configuration parameter value passed to PoolSource for\n"
36  << "the \"duplicateCheckMode\" parameter, legal values are:\n"
37  << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
38  }
39  }
40 
42  disabled_ = true;
46  }
47 
49  IndexIntoFile const& indexIntoFile,
50  std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
51  std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
52  dataType_ = realData ? isRealData : isSimulation;
53  if (checkDisabled())
54  return;
55 
58 
60  // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
61  // One unintended thing, it also saves the duplicate runs and lumis.
62  for (std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
63  if (indexesIntoFiles[i].get() != nullptr) {
64  indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
65  }
66  }
67  }
68  if (relevantPreviousEvents_.empty()) {
69  if (!indexIntoFile.containsDuplicateEvents()) {
71  }
72  }
73  }
74 
79  }
80 
84  return false;
85  if (checkDisabled())
86  return false;
87 
88  IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
89  bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
90 
91  if (duplicate) {
93  LogWarning("DuplicateEvent") << "Duplicate Events found in entire set of input files.\n"
94  << "Both events were from run " << run << " and luminosity block " << lumi
95  << " with event number " << event << ".\n"
96  << "The duplicate was from file " << fileName << ".\n"
97  << "The duplicate will be skipped.\n";
98  } else {
99  LogWarning("DuplicateEvent") << "Duplicate Events found in file " << fileName << ".\n"
100  << "Both events were from run " << run << " and luminosity block " << lumi
101  << " with event number " << event << ".\n"
102  << "The duplicate will be skipped.\n";
103  }
104  return true;
105  }
106  return false;
107  }
108 
110  std::string defaultString("checkAllFilesOpened");
111  desc.addUntracked<std::string>("duplicateCheckMode", defaultString)
112  ->setComment(
113  "'checkAllFilesOpened': check across all input files\n"
114  "'checkEachFile': check each input file independently\n"
115  "'checkEachRealDataFile': check each real data input file independently\n"
116  "'noDuplicateCheck': no duplicate checking\n");
117  }
118 } // namespace edm
T getUntrackedParameter(std::string const &, T const &) const
bool checkDisabled() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
unsigned long long EventNumber_t
static void fillDescription(ParameterSetDescription &desc)
bool isDuplicateAndCheckActive(int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const &fileName)
uint16_t size_type
unsigned int LuminosityBlockNumber_t
void set_intersection(IndexIntoFile const &indexIntoFile, std::set< IndexRunLumiEventKey > &intersection) const
list lumi
Definition: dqmdumpme.py:53
bool containsDuplicateEvents() const
Returns true if the IndexIntoFile contains 2 events with the same ProcessHistoryID index...
std::set< IndexIntoFile::IndexRunLumiEventKey > relevantPreviousEvents_
unsigned int RunNumber_t
Log< level::Warning, false > LogWarning
DuplicateChecker(ParameterSet const &pset)
void inputFileOpened(bool realData, IndexIntoFile const &indexIntoFile, std::vector< std::shared_ptr< IndexIntoFile > > const &indexesIntoFiles, std::vector< std::shared_ptr< IndexIntoFile > >::size_type currentIndexIntoFile)
DuplicateCheckMode duplicateCheckMode_