CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
TrainProcessor.cc
Go to the documentation of this file.
1 #include <limits>
2 #include <string>
3 
4 #include <TH1.h>
5 
8 
10 
13 
15  "PhysicsToolsMVATrainer");
16 
17 namespace PhysicsTools {
18 
20  const AtomicId *id,
21  MVATrainer *trainer) :
22  Source(*id), name(name), trainer(trainer), monitoring(0), monModule(0)
23 {
24 }
25 
27 {
28 }
29 
31 {
32  bool booked = false;
33  unsigned int nBins = 50;
34 
35  if (!monitoring) {
36  const char *source = getName();
37  if (source) {
38  monitoring = trainer->bookMonitor(name + "_" + source);
39  monModule = trainer->bookMonitor(std::string("input_") +
40  source);
41  } else {
42  monModule = trainer->bookMonitor("output");
43  nBins = 400;
44  }
45 
46  booked = monModule != 0;
47  }
48 
49  if (booked) {
50  std::vector<SourceVariable*> inputs = getInputs().get();
51  for(std::vector<SourceVariable*>::const_iterator iter =
52  inputs.begin(); iter != inputs.end(); ++iter) {
53 
54  SourceVariable *var = *iter;
55  std::string name =
56  (const char*)var->getSource()->getName()
57  + std::string("_")
58  + (const char*)var->getName();
59 
60  SigBkg pair;
61  pair.entries[0] = pair.entries[1] = 0;
62  pair.histo[0] = monModule->book<TH1F>(name + "_bkg",
63  (name + "_bkg").c_str(),
64  (name + " background").c_str(), nBins, 0, 0);
65  pair.histo[1] = monModule->book<TH1F>(name + "_sig",
66  (name + "_sig").c_str(),
67  (name + " signal").c_str(), nBins, 0, 0);
68  pair.underflow[0] = pair.underflow[1] = 0.0;
69  pair.overflow[0] = pair.overflow[1] = 0.0;
70 
71  pair.sameBinning = true; // use as default
72  if (monitoring) {
75  } else {
76  pair.min = -99999.0;
77  pair.max = +99999.0;
78  }
79 
80  monHistos.push_back(pair);
81  }
82  }
83 
84  trainBegin();
85 }
86 
87 void TrainProcessor::doTrainData(const std::vector<double> *values,
88  bool target, double weight,
89  bool train, bool test)
90 {
91  if (monModule && test) {
92  for(std::vector<SigBkg>::iterator iter = monHistos.begin();
93  iter != monHistos.end(); ++iter) {
94  const std::vector<double> &vals =
95  values[iter - monHistos.begin()];
96  for(std::vector<double>::const_iterator value =
97  vals.begin(); value != vals.end(); ++value) {
98 
99  iter->entries[target]++;
100 
101  if (*value <= iter->min) {
102  iter->underflow[target] += weight;
103  continue;
104  } else if (*value >= iter->max) {
105  iter->overflow[target] += weight;
106  continue;
107  }
108 
109  iter->histo[target]->Fill(*value, weight);
110 
111  if (iter->sameBinning)
112  iter->histo[!target]->Fill(*value, 0);
113  }
114  }
115  }
116 
117  if (train)
118  trainData(values, target, weight);
119  if (test)
120  testData(values, target, weight, train);
121 }
122 
124 {
125  trainEnd();
126 
127  if (monModule) {
128  for(std::vector<SigBkg>::const_iterator iter =
129  monHistos.begin(); iter != monHistos.end(); ++iter) {
130 
131  for(unsigned int i = 0; i < 2; i++) {
132  Int_t oBin = iter->histo[i]->GetNbinsX() + 1;
133  iter->histo[i]->SetBinContent(0,
134  iter->histo[i]->GetBinContent(0) +
135  iter->underflow[i]);
136  iter->histo[i]->SetBinContent(oBin,
137  iter->histo[i]->GetBinContent(oBin) +
138  iter->overflow[i]);
139  iter->histo[i]->SetEntries(iter->entries[i]);
140  }
141  }
142 
143  monModule = 0;
144  }
145 }
146 
147 template<>
150  const char *name, const AtomicId *id, MVATrainer *trainer)
151 {
152  TrainProcessor *result = ProcessRegistry::create(name, id, trainer);
153  if (!result) {
154  // try to load the shared library and retry
155  try {
156  delete TrainProcessor::PluginFactory::get()->create(
157  std::string("TrainProcessor/") + name);
158  result = ProcessRegistry::create(name, id, trainer);
159  } catch(const cms::Exception &e) {
160  // caller will have to deal with the null pointer
161  // in principle this will just give a slightly more
162  // descriptive error message (and will rethrow anyhow)
163  }
164  }
165  return result;
166 }
167 
168 } // namespace PhysicsTools
int i
Definition: DBlmapReader.cc:9
Source * getSource() const
static Base_t * create(const char *name, const CalibBase_t *calib, Parent_t *parent)
create an instance of name, given a calibration calib and parent parent
#define min(a, b)
Definition: mlp_lapack.h:161
TrainProcessor(const char *name, const AtomicId *id, MVATrainer *trainer)
const SourceVariableSet & getInputs() const
Definition: Source.h:26
Cheap generic unique keyword identifier class.
Definition: AtomicId.h:32
const AtomicId getName() const
Definition: Variable.h:144
virtual void testData(const std::vector< double > *values, bool target, double weight, bool trainedOn)
tuple result
Definition: query.py:137
virtual void trainData(const std::vector< double > *values, bool target, double weight)
const double infinity
std::vector< SourceVariable * > get(bool withMagic=false) const
SourceVariableSet inputs
Definition: Source.h:39
#define EDM_REGISTER_PLUGINFACTORY(_factory_, _category_)
void book(const std::string &name, T *object)
TrainerMonitoring::Module * bookMonitor(const std::string &name)
Definition: MVATrainer.cc:710
AtomicId getName() const
Definition: Source.h:19
std::vector< SigBkg > monHistos
Generic registry template for polymorphic processor implementations.
SurfaceDeformation * create(int type, const std::vector< double > &params)
void doTrainData(const std::vector< double > *values, bool target, double weight, bool train, bool test)
T get(const Candidate &c)
Definition: component.h:56