CMS 3D CMS Logo

TrainProcessor.cc
Go to the documentation of this file.
1 #include <limits>
2 #include <string>
3 
4 #include <TH1.h>
5 
8 
10 
13 #include "PhysicsTools/MVAComputer/interface/ProcessRegistry.icc"
14 
16  "PhysicsToolsMVATrainer");
17 
18 namespace PhysicsTools {
19 
21  const AtomicId *id,
22  MVATrainer *trainer) :
23  Source(*id), name(name), trainer(trainer), monitoring(nullptr), monModule(nullptr)
24 {
25 }
26 
28 {
29 }
30 
32 {
33  bool booked = false;
34  unsigned int nBins = 50;
35 
36  if (!monitoring) {
37  const char *source = getName();
38  if (source) {
39  monitoring = trainer->bookMonitor(name + "_" + source);
41  source);
42  } else {
43  monModule = trainer->bookMonitor("output");
44  nBins = 400;
45  }
46 
47  booked = monModule != nullptr;
48  }
49 
50  if (booked) {
51  std::vector<SourceVariable*> inputs = getInputs().get();
52  for(std::vector<SourceVariable*>::const_iterator iter =
53  inputs.begin(); iter != inputs.end(); ++iter) {
54 
55  SourceVariable *var = *iter;
57  (const char*)var->getSource()->getName()
58  + std::string("_")
59  + (const char*)var->getName();
60 
61  SigBkg pair;
62  pair.entries[0] = pair.entries[1] = 0;
63  pair.histo[0] = monModule->book<TH1F>(name + "_bkg",
64  (name + "_bkg").c_str(),
65  (name + " background").c_str(), nBins, 0, 0);
66  pair.histo[1] = monModule->book<TH1F>(name + "_sig",
67  (name + "_sig").c_str(),
68  (name + " signal").c_str(), nBins, 0, 0);
69  pair.underflow[0] = pair.underflow[1] = 0.0;
70  pair.overflow[0] = pair.overflow[1] = 0.0;
71 
72  pair.sameBinning = true; // use as default
73  if (monitoring) {
76  } else {
77  pair.min = -99999.0;
78  pair.max = +99999.0;
79  }
80 
81  monHistos.push_back(pair);
82  }
83  }
84 
85  trainBegin();
86 }
87 
88 void TrainProcessor::doTrainData(const std::vector<double> *values,
89  bool target, double weight,
90  bool train, bool test)
91 {
92  if (monModule && test) {
93  for(std::vector<SigBkg>::iterator iter = monHistos.begin();
94  iter != monHistos.end(); ++iter) {
95  const std::vector<double> &vals =
96  values[iter - monHistos.begin()];
97  for(std::vector<double>::const_iterator value =
98  vals.begin(); value != vals.end(); ++value) {
99 
100  iter->entries[target]++;
101 
102  if (*value <= iter->min) {
103  iter->underflow[target] += weight;
104  continue;
105  } else if (*value >= iter->max) {
106  iter->overflow[target] += weight;
107  continue;
108  }
109 
110  iter->histo[target]->Fill(*value, weight);
111 
112  if (iter->sameBinning)
113  iter->histo[!target]->Fill(*value, 0);
114  }
115  }
116  }
117 
118  if (train)
119  trainData(values, target, weight);
120  if (test)
121  testData(values, target, weight, train);
122 }
123 
125 {
126  trainEnd();
127 
128  if (monModule) {
129  for(std::vector<SigBkg>::const_iterator iter =
130  monHistos.begin(); iter != monHistos.end(); ++iter) {
131 
132  for(unsigned int i = 0; i < 2; i++) {
133  Int_t oBin = iter->histo[i]->GetNbinsX() + 1;
134  iter->histo[i]->SetBinContent(0,
135  iter->histo[i]->GetBinContent(0) +
136  iter->underflow[i]);
137  iter->histo[i]->SetBinContent(oBin,
138  iter->histo[i]->GetBinContent(oBin) +
139  iter->overflow[i]);
140  iter->histo[i]->SetEntries(iter->entries[i]);
141  }
142  }
143 
144  monModule = nullptr;
145  }
146 }
147 
148 template<>
151  const char *name, const AtomicId *id, MVATrainer *trainer)
152 {
153  TrainProcessor *result = ProcessRegistry::create(name, id, trainer);
154  if (!result) {
155  // try to load the shared library and retry
156  try {
157  std::unique_ptr<PhysicsTools::TrainProcessor::Dummy> tmp{TrainProcessor::PluginFactory::get()->create(
158  std::string("TrainProcessor/") + name)};
159  result = ProcessRegistry::create(name, id, trainer);
160  } catch(const cms::Exception &e) {
161  // caller will have to deal with the null pointer
162  // in principle this will just give a slightly more
163  // descriptive error message (and will rethrow anyhow)
164  }
165  }
166  return result;
167 }
168 
169 } // namespace PhysicsTools
Source * getSource() const
#define EDM_REGISTER_PLUGINFACTORY(_factory_, _category_)
Definition: PluginFactory.h:86
def create(alignables, pedeDump, additionalData, outputFile, config)
static Base_t * create(const char *name, const CalibBase_t *calib, Parent_t *parent)
create an instance of name, given a calibration calib and parent parent
#define nullptr
static void unregisterProcess(const char *name)
static void registerProcess(const char *name, const ProcessRegistry *process)
Definition: weight.py:1
TrainProcessor(const char *name, const AtomicId *id, MVATrainer *trainer)
const SourceVariableSet & getInputs() const
Definition: Source.h:26
Cheap generic unique keyword identifier class.
Definition: AtomicId.h:31
const AtomicId getName() const
Definition: Variable.h:143
virtual void testData(const std::vector< double > *values, bool target, double weight, bool trainedOn)
virtual void trainData(const std::vector< double > *values, bool target, double weight)
const double infinity
std::vector< SourceVariable * > get(bool withMagic=false) const
Definition: value.py:1
T min(T a, T b)
Definition: MathUtil.h:58
SourceVariableSet inputs
Definition: Source.h:39
void book(const std::string &name, T *object)
TrainerMonitoring::Module * bookMonitor(const std::string &name)
Definition: MVATrainer.cc:708
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
AtomicId getName() const
Definition: Source.h:19
std::vector< SigBkg > monHistos
Generic registry template for polymorphic processor implementations.
static std::string const source
Definition: EdmProvDump.cc:47
void doTrainData(const std::vector< double > *values, bool target, double weight, bool train, bool test)
T get(const Candidate &c)
Definition: component.h:55