11 #include <xercesc/dom/DOM.hpp>
25 XERCES_CPP_NAMESPACE_USE
27 using namespace PhysicsTools;
37 virtual ~ProcNormalize();
39 virtual void configure(DOMElement *
elem)
override;
42 virtual void trainBegin()
override;
43 virtual void trainData(
const std::vector<double> *
values,
45 virtual void trainEnd()
override;
47 virtual bool load()
override;
48 virtual void save()
override;
62 for(
unsigned int i = 0;
i < distr.size();
i++)
63 histo.setBinContent(
i + 1, distr[
i]);
68 std::vector<double> distr;
75 std::vector<PDF> pdfs;
77 unsigned int nCategories;
82 ProcNormalize::ProcNormalize(
const char *
name,
const AtomicId *
id,
90 ProcNormalize::~ProcNormalize()
94 void ProcNormalize::configure(DOMElement *
elem)
97 for(DOMNode *
node = elem->getFirstChild();
99 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
102 DOMElement *elem =
static_cast<DOMElement*
>(
node);
106 if (std::strcmp(nodeName,
"category") != 0) {
111 if (categoryIdx >= 0)
113 <<
"More than one category variable given."
117 unsigned int count = XMLDocument::readAttribute<unsigned int>(
124 for(DOMNode *
node = elem->getFirstChild();
126 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
130 if (std::strcmp(nodeName,
"category") == 0)
133 if (std::strcmp(nodeName,
"pdf") != 0)
135 <<
"Expected pdf tag in config section."
137 elem =
static_cast<DOMElement*
>(
node);
141 pdf.distr.resize(XMLDocument::readAttribute<unsigned int>(
144 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
148 XMLDocument::readAttribute<bool>(
elem,
"signal",
true);
150 XMLDocument::readAttribute<bool>(
elem,
"background",
true);
152 if (!pdf.fillSignal && !pdf.fillBackground)
154 <<
"Filling neither background nor signal "
155 "in config." << std::endl;
159 pdf.range.min = XMLDocument::readAttribute<double>(
161 pdf.range.max = XMLDocument::readAttribute<double>(
163 pdf.iteration = ITER_FILL;
165 pdf.iteration = ITER_EMPTY;
167 for(
unsigned int i = 0; i < nCategories; i++)
171 unsigned int nInputs = getInputs().size();
172 if (categoryIdx >= 0)
175 if (pdfs.size() != nInputs * nCategories)
177 <<
"Got " << pdfs.size()
178 <<
" pdf configs in total for " << nCategories
179 <<
" categories and " << nInputs
180 <<
" input varibles (" << (nInputs * nCategories) <<
" in total)." << std::endl;
187 std::vector<unsigned int> pdfMap;
188 for(
unsigned int i = 0; i < nCategories; i++)
189 for(
unsigned int j = i;
j < pdfs.size();
j += nCategories)
192 for(
unsigned int i = 0; i < pdfs.size(); i++)
193 calib->
distr.push_back(pdfs[pdfMap[i]]);
200 void ProcNormalize::trainBegin()
204 void ProcNormalize::trainData(
const std::vector<double> *
values,
208 if (categoryIdx >= 0)
209 category = (int)values[categoryIdx].
front();
210 if (category < 0 || category >= (
int)nCategories)
214 for(std::vector<PDF>::iterator iter = pdfs.begin() +
category;
215 iter < pdfs.end(); iter += nCategories, values++) {
216 if (i++ == categoryIdx)
219 switch(iter->iteration) {
221 for(std::vector<double>::const_iterator
value =
224 iter->range.min = iter->range.max = *
value;
225 iter->iteration = ITER_RANGE;
229 for(std::vector<double>::const_iterator
value =
232 iter->range.min =
std::min(iter->range.min,
234 iter->range.max =
std::max(iter->range.max,
244 if (!(target ? iter->fillSignal : iter->fillBackground))
247 unsigned int n = iter->distr.size() - 1;
248 double mult = 1.0 / iter->range.width();
250 for(std::vector<double>::const_iterator
value =
251 values->begin();
value != values->end();
value++) {
252 double x = (*
value - iter->range.min) * mult;
258 iter->distr[(
unsigned int)(x * n + 0.5)] +=
weight;
263 static void smoothArray(
unsigned int n,
double *values,
unsigned int nTimes)
265 for(
unsigned int iter = 0; iter < nTimes; iter++) {
266 double hold = n > 0 ? values[0] : 0.0;
267 for(
unsigned int i = 0; i <
n; i++) {
268 double delta = hold * 0.1;
271 values[i - 1] +=
delta;
275 hold = values[i + 1];
276 values[i + 1] +=
delta;
284 void ProcNormalize::trainEnd()
287 for(std::vector<PDF>::iterator iter = pdfs.begin();
288 iter != pdfs.end(); iter++) {
289 switch(iter->iteration) {
292 iter->iteration = ITER_FILL;
296 iter->distr.front() *= 2;
297 iter->distr.back() *= 2;
298 smoothArray(iter->distr.size(),
299 &iter->distr.front(),
302 iter->iteration = ITER_DONE;
312 if (done && monitoring) {
313 std::vector<SourceVariable*>
inputs = getInputs().get();
314 if (categoryIdx >= 0)
315 inputs.erase(inputs.begin() + categoryIdx);
317 for(std::vector<PDF>::iterator iter = pdfs.begin();
318 iter != pdfs.end(); iter++) {
319 unsigned int idx = iter - pdfs.begin();
320 unsigned int catIdx = idx % nCategories;
321 unsigned int varIdx = idx / nCategories;
328 if (categoryIdx >= 0) {
329 name += Form(
"_CAT%d", catIdx);
330 title += Form(
" (cat. %d)", catIdx);
333 unsigned int n = iter->distr.size() - 1;
334 double min = iter->range.min -
335 0.5 * iter->range.width() /
n;
336 double max = iter->range.max +
337 0.5 * iter->range.width() /
n;
338 TH1F *
histo = monitoring->book<TH1F>(name +
"_pdf",
339 name.c_str(), title.c_str(), n + 1,
min,
max);
340 for(
unsigned int i = 0; i <
n; i++)
341 histo->SetBinContent(i + 1, iter->distr[i]);
353 unsigned int category) :
354 source(source), name(name), category(category) {}
358 return source == other.source &&
359 name == other.name &&
360 category == other.category;
363 inline bool operator < (
const Id &other)
const
365 if (
source < other.source)
367 if (!(
source == other.source))
369 if (name < other.name)
371 if (!(name == other.name))
373 return category < other.category;
381 if (!exists(filename))
385 DOMElement *elem = xml.getRootNode();
387 "ProcNormalize") != 0)
389 <<
"XML training data file has bad root node."
392 unsigned int version = XMLDocument::readAttribute<unsigned int>(
395 if (version < 1 || version > 2)
397 <<
"Unsupported version " << version
398 <<
"in train file." << std::endl;
400 std::map<Id, PDF*> pdfMap;
402 for(std::vector<PDF>::iterator iter = pdfs.begin();
403 iter != pdfs.end(); ++iter) {
405 unsigned int idx = iter - pdfs.begin();
406 unsigned int catIdx = idx % nCategories;
407 unsigned int varIdx = idx / nCategories;
408 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
416 std::vector<PDF>::iterator cur = pdfs.begin();
418 for(DOMNode *
node = elem->getFirstChild();
420 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
425 <<
"Expected pdf tag in train file."
427 elem =
static_cast<DOMElement*
>(
node);
432 if (cur == pdfs.end())
434 <<
"Superfluous PDF in train data."
439 Id id(XMLDocument::readAttribute<std::string>(
441 XMLDocument::readAttribute<std::string>(
443 XMLDocument::readAttribute<unsigned int>(
444 elem,
"category", 0));
445 std::map<Id, PDF*>::const_iterator pos =
447 if (pos == pdfMap.end())
455 XMLDocument::readAttribute<double>(
elem,
"lower");
457 XMLDocument::readAttribute<double>(
elem,
"upper");
458 pdf->iteration = ITER_DONE;
461 for(DOMNode *subNode = elem->getFirstChild();
462 subNode; subNode = subNode->getNextSibling()) {
463 if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
469 <<
"Expected value tag in train file."
472 elem =
static_cast<DOMElement*
>(
node);
474 pdf->distr.push_back(
475 XMLDocument::readContent<double>(subNode));
479 if (version == 1 && cur != pdfs.end())
481 <<
"Missing PDF in train data." << std::endl;
484 for(std::vector<PDF>::const_iterator iter = pdfs.begin();
485 iter != pdfs.end(); ++iter) {
486 if (iter->iteration != ITER_DONE) {
497 XMLDocument xml(trainer->trainFileName(
this,
"xml"),
true);
501 for(std::vector<PDF>::const_iterator iter = pdfs.begin();
502 iter != pdfs.end(); iter++) {
503 DOMElement *elem = doc->createElement(
XMLUniStr(
"pdf"));
504 xml.getRootNode()->appendChild(elem);
506 unsigned int idx = iter - pdfs.begin();
507 unsigned int catIdx = idx % nCategories;
508 unsigned int varIdx = idx / nCategories;
509 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
516 if (categoryIdx >= 0)
522 for(std::vector<double>::const_iterator iter2 =
523 iter->distr.begin(); iter2 != iter->distr.end(); iter2++) {
526 elem->appendChild(value);
528 XMLDocument::writeContent<double>(
value, doc, *iter2);
T x() const
Cartesian x coordinate.
MVATrainerComputer * calib
bool operator<(const FedChannelConnection &, const FedChannelConnection &)
static bool hasAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name)
bool operator==(const QGLikelihoodParameters &lhs, const QGLikelihoodCategory &rhs)
Test if parameters are compatible with category.
static void writeAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name, const T &value)
tuple idx
DEBUGGING if hasattr(process,"trackMonIterativeTracking2012"): print "trackMonIterativeTracking2012 D...
XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument * createDocument(const std::string &root)
static std::string const source