11 #include <xercesc/dom/DOM.hpp> 37 ~ProcLikelihood()
override;
39 void configure(DOMElement *
elem)
override;
42 void trainBegin()
override;
43 void trainData(
const std::vector<double> *
values,
45 void trainEnd()
override;
51 std::vector<double> distr;
70 std::vector<SigBkg> pdfs;
71 std::vector<double> sigSum;
72 std::vector<double> bkgSum;
73 std::vector<double> bias;
88 ProcLikelihood::ProcLikelihood(
const char *
name,
const AtomicId *
id,
97 doCategoryBias(
false),
104 ProcLikelihood::~ProcLikelihood()
108 void ProcLikelihood::configure(DOMElement *
elem)
112 for(DOMNode *node = elem->getFirstChild();
113 node; node = node->getNextSibling()) {
114 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
117 DOMElement *elem =
static_cast<DOMElement*
>(node);
121 if (std::strcmp(nodeName,
"general") == 0) {
124 <<
"Config tag general needs to come " 125 "first." << std::endl;
129 XMLDocument::readAttribute<double>(
131 bias.push_back(globalBias);
136 doCategoryBias = XMLDocument::readAttribute<bool>(
137 elem,
"category_bias",
false);
138 doGlobalBias = XMLDocument::readAttribute<bool>(
139 elem,
"global_bias",
false);
140 logOutput = XMLDocument::readAttribute<bool>(
142 individual = XMLDocument::readAttribute<bool>(
143 elem,
"individual",
false);
144 neverUndefined = !XMLDocument::readAttribute<bool>(
145 elem,
"strict",
false);
146 keepEmpty = !XMLDocument::readAttribute<bool>(
147 elem,
"ignore_empty",
true);
154 if (std::strcmp(nodeName,
"bias_table") == 0) {
157 <<
"Bias can be only specified once." 160 for(DOMNode *subNode = node->getFirstChild();
161 subNode; subNode = subNode->getNextSibling()) {
162 if (subNode->getNodeType() !=
163 DOMNode::ELEMENT_NODE)
167 subNode->getNodeName()),
170 <<
"Expected bias tag in " 171 "config." << std::endl;
174 XMLDocument::readContent<double>(
181 if (std::strcmp(nodeName,
"category") != 0) {
186 if (categoryIdx >= 0)
188 <<
"More than one category variable given." 192 unsigned int count = XMLDocument::readAttribute<unsigned int>(
199 for(DOMNode *node = elem->getFirstChild();
200 node; node = node->getNextSibling()) {
201 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
205 if (std::strcmp(nodeName,
"general") == 0 ||
206 std::strcmp(nodeName,
"bias_table") == 0 ||
207 std::strcmp(nodeName,
"category") == 0)
210 if (std::strcmp(nodeName,
"sigbkg") != 0)
212 <<
"Expected sigbkg tag in config section." 214 elem =
static_cast<DOMElement*
>(node);
218 unsigned int size = XMLDocument::readAttribute<unsigned int>(
220 pdf.signal.distr.resize(size);
221 pdf.background.distr.resize(size);
223 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
228 pdf.signal.range.min =
229 XMLDocument::readAttribute<double>(
231 pdf.signal.range.max =
232 XMLDocument::readAttribute<double>(
234 pdf.background.range = pdf.signal.range;
235 pdf.iteration = ITER_FILL;
237 pdf.iteration = ITER_EMPTY;
243 unsigned int nInputs = getInputs().size();
244 if (categoryIdx >= 0)
250 if (!doGivenBias && !bias.empty()) {
254 <<
"Invalid number of category bias entries." 258 bias.push_back(bias.front());
263 <<
" pdf configs for " << nInputs
264 <<
" input variables." << std::endl;
273 std::vector<unsigned int> pdfMap;
275 for(
unsigned int j = i; j < pdfs.size(); j +=
nCategories)
278 double totalSig = std::accumulate(sigSum.begin(), sigSum.end(), 0.0);
279 double totalBkg = std::accumulate(bkgSum.begin(), bkgSum.end(), 0.0);
281 for(
unsigned int i = 0; i < pdfs.size(); i++) {
282 const SigBkg *iter = &pdfs[pdfMap[
i]];
287 double factor = std::accumulate(iter->signal.distr.begin(),
288 iter->signal.distr.end(), 0.0);
292 factor = 1.0 / factor;
293 std::vector<double>
values(iter->signal.distr.size() + 2);
295 iter->signal.distr.end(),
297 [&](
auto const&
c) {
return c * factor;});
302 iter->background.range.min,
303 iter->background.range.max);
304 factor = std::accumulate(iter->background.distr.begin(),
305 iter->background.distr.end(), 0.0);
309 factor = 1.0 / factor;
311 iter->background.distr.end(),
313 [&](
auto const&
c){
return c * factor;});
318 calib->
pdfs.push_back(pdf);
327 if (doGlobalBias || doCategoryBias || doGivenBias) {
329 double bias = doGlobalBias
330 ? totalSig / totalBkg
333 bias *= this->bias[
i];
335 bias *= (sigSum[
i] / totalSig) /
336 (bkgSum[i] / totalBkg);
337 calib->
bias.push_back(bias);
344 void ProcLikelihood::trainBegin()
348 void ProcLikelihood::trainData(
const std::vector<double> *
values,
352 if (categoryIdx >= 0)
353 category = (
int)values[categoryIdx].front();
365 for(std::vector<SigBkg>::iterator iter = pdfs.begin() +
category;
366 iter < pdfs.end(); iter +=
nCategories, values++) {
367 if (i++ == categoryIdx)
370 switch(iter->iteration) {
372 for(std::vector<double>::const_iterator
value =
375 iter->signal.range.min =
376 iter->signal.range.max = *
value;
377 iter->iteration = ITER_RANGE;
381 for(std::vector<double>::const_iterator
value =
384 iter->signal.range.min =
387 iter->signal.range.max =
398 PDF &pdf = target ? iter->signal : iter->background;
399 unsigned int n = pdf.distr.size() - 1;
400 double mult = 1.0 / pdf.range.width();
402 for(std::vector<double>::const_iterator
value =
403 values->begin();
value != values->end();
value++) {
404 double x = (*
value - pdf.range.min) * mult;
410 pdf.distr[(
unsigned int)(x * n + 0.5)] +=
weight;
415 void smoothArray(
unsigned int n,
double *values,
unsigned int nTimes)
417 for(
unsigned int iter = 0; iter < nTimes; iter++) {
418 double hold = n > 0 ? values[0] : 0.0;
419 for(
unsigned int i = 0; i <
n; i++) {
420 double delta = hold * 0.1;
423 values[i - 1] +=
delta;
427 hold = values[i + 1];
428 values[i + 1] +=
delta;
436 void ProcLikelihood::trainEnd()
442 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
443 iter != pdfs.end(); iter++) {
444 switch(iter->iteration) {
447 iter->background.range = iter->signal.range;
448 iter->iteration = ITER_FILL;
452 iter->signal.distr.front() *= 2;
453 iter->signal.distr.back() *= 2;
454 smoothArray(iter->signal.distr.size(),
455 &iter->signal.distr.front(),
458 iter->background.distr.front() *= 2;
459 iter->background.distr.back() *= 2;
460 smoothArray(iter->background.distr.size(),
461 &iter->background.distr.front(),
464 iter->iteration = ITER_DONE;
475 std::vector<SourceVariable*>
inputs = getInputs().get();
476 if (categoryIdx >= 0)
477 inputs.erase(inputs.begin() + categoryIdx);
479 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
480 iter != pdfs.end(); iter++) {
481 unsigned int idx = iter - pdfs.begin();
490 if (categoryIdx >= 0) {
491 name += Form(
"_CAT%d", catIdx);
492 title += Form(
" (cat. %d)", catIdx);
495 unsigned int n = iter->signal.distr.size() - 1;
496 double min = iter->signal.range.min -
497 0.5 * iter->signal.range.width() /
n;
498 double max = iter->signal.range.max +
499 0.5 * iter->signal.range.width() /
n;
501 (name +
"_sig").c_str(),
502 (title +
" signal").c_str(), n + 1,
min,
max);
503 for(
unsigned int i = 0; i <
n; i++)
504 histo->SetBinContent(
505 i + 1, iter->signal.distr[i]);
507 n = iter->background.distr.size() - 1;
508 min = iter->background.range.min -
509 0.5 * iter->background.range.width() /
n;
510 max = iter->background.range.max +
511 0.5 * iter->background.range.width() /
n;
513 (name +
"_bkg").c_str(),
514 (title +
" background").c_str(),
516 for(
unsigned int i = 0; i <
n; i++)
517 histo->SetBinContent(
518 i + 1, iter->background.distr[i]);
523 void xmlParsePDF(ProcLikelihood::PDF &pdf, DOMElement *elem)
526 std::strcmp(
XMLSimpleStr(elem->getNodeName()),
"pdf") != 0)
528 <<
"Expected pdf tag in sigbkg train data." 531 pdf.range.min = XMLDocument::readAttribute<double>(
elem,
"lower");
532 pdf.range.max = XMLDocument::readAttribute<double>(
elem,
"upper");
535 for(DOMNode *node = elem->getFirstChild();
536 node; node = node->getNextSibling()) {
537 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
543 <<
"Expected value tag in train file." 546 pdf.distr.push_back(XMLDocument::readContent<double>(node));
557 unsigned int category) :
558 source(source), name(name), category(category) {}
562 return source == other.source &&
563 name == other.name &&
564 category == other.category;
569 if (source < other.source)
571 if (!(source == other.source))
573 if (name < other.name)
575 if (!(name == other.name))
577 return category < other.category;
585 if (!exists(filename))
589 DOMElement *elem = xml.getRootNode();
591 "ProcLikelihood") != 0)
593 <<
"XML training data file has bad root node." 596 unsigned int version = XMLDocument::readAttribute<unsigned int>(
599 if (version < 1 || version > 2)
601 <<
"Unsupported version " << version
602 <<
"in train file." << std::endl;
605 for(node = elem->getFirstChild();
606 node; node = node->getNextSibling()) {
607 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
613 <<
"Expected categories tag in train file." 617 for(DOMNode *subNode = node->getFirstChild();
618 subNode; subNode = subNode->getNextSibling()) {
619 if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
622 if (i >= nCategories)
624 <<
"Too many categories in train " 625 "file." << std::endl;
630 <<
"Expected category tag in train " 631 "file." << std::endl;
633 elem =
static_cast<DOMElement*
>(subNode);
635 sigSum[
i] = XMLDocument::readAttribute<double>(
637 bkgSum[
i] = XMLDocument::readAttribute<double>(
643 <<
"Too few categories in train file." 649 std::map<Id, SigBkg*> pdfMap;
651 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
652 iter != pdfs.end(); ++iter) {
653 SigBkg *ptr = &*iter;
654 unsigned int idx = iter - pdfs.begin();
657 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
665 std::vector<SigBkg>::iterator cur = pdfs.begin();
667 for(node = node->getNextSibling();
668 node; node = node->getNextSibling()) {
669 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
675 <<
"Expected sigbkg tag in train file." 677 elem =
static_cast<DOMElement*
>(node);
679 SigBkg *pdf =
nullptr;
682 if (cur == pdfs.end())
684 <<
"Superfluous SigBkg in train data." 689 Id
id(XMLDocument::readAttribute<std::string>(
691 XMLDocument::readAttribute<std::string>(
693 XMLDocument::readAttribute<unsigned int>(
694 elem,
"category", 0));
695 std::map<Id, SigBkg*>::const_iterator
pos =
697 if (pos == pdfMap.end())
704 for(node = elem->getFirstChild();
705 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
706 node = node->getNextSibling());
707 DOMElement *elemSig =
708 node ?
static_cast<DOMElement*
>(node) :
nullptr;
710 for(node = node->getNextSibling();
711 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
712 node = node->getNextSibling());
713 while(node && node->getNodeType() != DOMNode::ELEMENT_NODE)
714 node = node->getNextSibling();
715 DOMElement *elemBkg =
716 node ?
static_cast<DOMElement*
>(node) :
nullptr;
718 for(node = node->getNextSibling();
719 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
720 node = node->getNextSibling());
723 <<
"Superfluous tags in sigbkg train data." 726 xmlParsePDF(pdf->signal, elemSig);
727 xmlParsePDF(pdf->background, elemBkg);
729 pdf->iteration = ITER_DONE;
734 if (version == 1 && cur != pdfs.end())
736 <<
"Missing SigBkg in train data." << std::endl;
740 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
741 iter != pdfs.end(); ++iter) {
742 if (iter->iteration != ITER_DONE) {
751 DOMElement *xmlStorePDF(DOMDocument *
doc,
752 const ProcLikelihood::PDF &pdf)
754 DOMElement *elem = doc->createElement(
XMLUniStr(
"pdf"));
759 for(std::vector<double>::const_iterator iter =
760 pdf.distr.begin(); iter != pdf.distr.end(); iter++) {
762 elem->appendChild(value);
764 XMLDocument::writeContent<double>(
value,
doc, *iter);
772 XMLDocument xml(trainer->trainFileName(
this,
"xml"),
true);
776 DOMElement *elem = doc->createElement(
XMLUniStr(
"categories"));
777 xml.getRootNode()->appendChild(elem);
779 DOMElement *category = doc->createElement(
XMLUniStr(
"category"));
780 elem->appendChild(category);
785 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
786 iter != pdfs.end(); iter++) {
787 elem = doc->createElement(
XMLUniStr(
"sigbkg"));
788 xml.getRootNode()->appendChild(elem);
790 unsigned int idx = iter - pdfs.begin();
793 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
800 if (categoryIdx >= 0)
803 elem->appendChild(xmlStorePDF(doc, iter->signal));
804 elem->appendChild(xmlStorePDF(doc, iter->background));
MVATrainerComputer * calib
static bool hasAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name)
bool operator==(const QGLikelihoodParameters &lhs, const QGLikelihoodCategory &rhs)
Test if parameters are compatible with category.
static void writeAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name, const T &value)
def elem(elemtype, innerHTML='', html_class='', kwargs)
XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument * createDocument(const std::string &root)
bool operator<(DTCELinkId const &lhs, DTCELinkId const &rhs)
static std::string const source