CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
DuplicateChecker.cc
Go to the documentation of this file.
1 
7 
8 #include <cassert>
9 #include <algorithm>
10 
11 namespace edm {
12 
14  dataType_(unknown),
15  itIsKnownTheFileHasNoDuplicates_(false),
16  disabled_(false)
17  {
18  // The default value provided as the second argument to the getUntrackedParameter function call
19  // is not used when the ParameterSet has been validated and the parameters are not optional
20  // in the description. This is currently true when PoolSource is the primary input source.
21  // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
22  // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource
23  // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
24  // and should be deleted from the code.
26  pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
27 
28  if (duplicateCheckMode == std::string("noDuplicateCheck")) duplicateCheckMode_ = noDuplicateCheck;
29  else if (duplicateCheckMode == std::string("checkEachFile")) duplicateCheckMode_ = checkEachFile;
30  else if (duplicateCheckMode == std::string("checkEachRealDataFile")) duplicateCheckMode_ = checkEachRealDataFile;
31  else if (duplicateCheckMode == std::string("checkAllFilesOpened")) duplicateCheckMode_ = checkAllFilesOpened;
32  else {
33  throw cms::Exception("Configuration")
34  << "Illegal configuration parameter value passed to PoolSource for\n"
35  << "the \"duplicateCheckMode\" parameter, legal values are:\n"
36  << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
37  }
38  }
39 
41  disabled_ = true;
45  }
46 
48  bool realData,
49  IndexIntoFile const& indexIntoFile,
50  std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
51  std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
52 
53  dataType_ = realData ? isRealData : isSimulation;
54  if (checkDisabled()) return;
55 
58 
60 
61  // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
62  // One unintended thing, it also saves the duplicate runs and lumis.
63  for(std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
64  if (indexesIntoFiles[i].get() != 0) {
65 
66  indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
67  }
68  }
69  }
70  if (relevantPreviousEvents_.empty()) {
71  if(!indexIntoFile.containsDuplicateEvents()) {
73  }
74  }
75  }
76 
78  {
82  }
83 
88  std::string const& fileName) {
89  if (itIsKnownTheFileHasNoDuplicates_) return false;
90  if (checkDisabled()) return false;
91 
92  IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
93  bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
94 
95  if (duplicate) {
97  LogWarning("DuplicateEvent")
98  << "Duplicate Events found in entire set of input files.\n"
99  << "Both events were from run " << run
100  << " and luminosity block " << lumi
101  << " with event number " << event << ".\n"
102  << "The duplicate was from file " << fileName << ".\n"
103  << "The duplicate will be skipped.\n";
104  }
105  else {
106  LogWarning("DuplicateEvent")
107  << "Duplicate Events found in file " << fileName << ".\n"
108  << "Both events were from run " << run
109  << " and luminosity block " << lumi
110  << " with event number " << event << ".\n"
111  << "The duplicate will be skipped.\n";
112  }
113  return true;
114  }
115  return false;
116  }
117 
118  void
120  std::string defaultString("checkAllFilesOpened");
121  desc.addUntracked<std::string>("duplicateCheckMode", defaultString)->setComment(
122  "'checkAllFilesOpened': check across all input files\n"
123  "'checkEachFile': check each input file independently\n"
124  "'checkEachRealDataFile': check each real data input file independently\n"
125  "'noDuplicateCheck': no duplicate checking\n"
126  );
127  }
128 }
T getUntrackedParameter(std::string const &, T const &) const
int i
Definition: DBlmapReader.cc:9
bool checkDisabled() const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
tuple lumi
Definition: fjr2json.py:35
unsigned long long EventNumber_t
static void fillDescription(ParameterSetDescription &desc)
bool isDuplicateAndCheckActive(int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const &fileName)
uint16_t size_type
unsigned int LuminosityBlockNumber_t
static const boost::regex duplicate("duplicateIOV[[:print:]]+?[S|s]ince[=| ]([[:alnum:]]+?);.*")
void set_intersection(IndexIntoFile const &indexIntoFile, std::set< IndexRunLumiEventKey > &intersection) const
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger but the state exists so we define the behavior If all triggers are the negative crieriion will lead to accepting the event(this again matches the behavior of"!*"before the partial wildcard feature was incorporated).The per-event"cost"of each negative criterion with multiple relevant triggers is about the same as!*was in the past
bool containsDuplicateEvents() const
Returns true if the IndexIntoFile contains 2 events with the same ProcessHistoryID index...
std::set< IndexIntoFile::IndexRunLumiEventKey > relevantPreviousEvents_
unsigned int RunNumber_t
volatile std::atomic< bool > shutdown_flag false
DuplicateChecker(ParameterSet const &pset)
void inputFileOpened(bool realData, IndexIntoFile const &indexIntoFile, std::vector< std::shared_ptr< IndexIntoFile > > const &indexesIntoFiles, std::vector< std::shared_ptr< IndexIntoFile > >::size_type currentIndexIntoFile)
DuplicateCheckMode duplicateCheckMode_