11 #include <xercesc/dom/DOM.hpp>
25 XERCES_CPP_NAMESPACE_USE
27 using namespace PhysicsTools;
37 virtual ~ProcLikelihood();
39 virtual void configure(DOMElement *
elem)
override;
42 virtual void trainBegin()
override;
43 virtual void trainData(
const std::vector<double> *
values,
45 virtual void trainEnd()
override;
47 virtual bool load()
override;
48 virtual void save()
override;
51 std::vector<double> distr;
70 std::vector<SigBkg> pdfs;
71 std::vector<double> sigSum;
72 std::vector<double> bkgSum;
73 std::vector<double> bias;
79 unsigned int nCategories;
88 ProcLikelihood::ProcLikelihood(
const char *
name,
const AtomicId *
id,
97 doCategoryBias(
false),
104 ProcLikelihood::~ProcLikelihood()
108 void ProcLikelihood::configure(DOMElement *
elem)
112 for(DOMNode *
node = elem->getFirstChild();
114 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
117 DOMElement *elem =
static_cast<DOMElement*
>(
node);
121 if (std::strcmp(nodeName,
"general") == 0) {
124 <<
"Config tag general needs to come "
125 "first." << std::endl;
129 XMLDocument::readAttribute<double>(
131 bias.push_back(globalBias);
136 doCategoryBias = XMLDocument::readAttribute<bool>(
137 elem,
"category_bias",
false);
138 doGlobalBias = XMLDocument::readAttribute<bool>(
139 elem,
"global_bias",
false);
140 logOutput = XMLDocument::readAttribute<bool>(
142 individual = XMLDocument::readAttribute<bool>(
143 elem,
"individual",
false);
144 neverUndefined = !XMLDocument::readAttribute<bool>(
145 elem,
"strict",
false);
146 keepEmpty = !XMLDocument::readAttribute<bool>(
147 elem,
"ignore_empty",
true);
154 if (std::strcmp(nodeName,
"bias_table") == 0) {
157 <<
"Bias can be only specified once."
160 for(DOMNode *subNode =
node->getFirstChild();
161 subNode; subNode = subNode->getNextSibling()) {
162 if (subNode->getNodeType() !=
163 DOMNode::ELEMENT_NODE)
167 subNode->getNodeName()),
170 <<
"Expected bias tag in "
171 "config." << std::endl;
174 XMLDocument::readContent<double>(
181 if (std::strcmp(nodeName,
"category") != 0) {
186 if (categoryIdx >= 0)
188 <<
"More than one category variable given."
192 unsigned int count = XMLDocument::readAttribute<unsigned int>(
199 for(DOMNode *
node = elem->getFirstChild();
201 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
205 if (std::strcmp(nodeName,
"general") == 0 ||
206 std::strcmp(nodeName,
"bias_table") == 0 ||
207 std::strcmp(nodeName,
"category") == 0)
210 if (std::strcmp(nodeName,
"sigbkg") != 0)
212 <<
"Expected sigbkg tag in config section."
214 elem =
static_cast<DOMElement*
>(
node);
218 unsigned int size = XMLDocument::readAttribute<unsigned int>(
220 pdf.signal.distr.resize(size);
221 pdf.background.distr.resize(size);
223 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
228 pdf.signal.range.min =
229 XMLDocument::readAttribute<double>(
231 pdf.signal.range.max =
232 XMLDocument::readAttribute<double>(
234 pdf.background.range = pdf.signal.range;
235 pdf.iteration = ITER_FILL;
237 pdf.iteration = ITER_EMPTY;
239 for(
unsigned int i = 0; i < nCategories; i++)
243 unsigned int nInputs = getInputs().size();
244 if (categoryIdx >= 0)
247 sigSum.resize(nCategories);
248 bkgSum.resize(nCategories);
250 if (!doGivenBias && !bias.empty()) {
252 if (bias.size() != nCategories)
254 <<
"Invalid number of category bias entries."
257 while (doGivenBias && bias.size() < nCategories)
258 bias.push_back(bias.front());
260 if (pdfs.size() != nInputs * nCategories)
262 <<
"Got " << (pdfs.size() / nCategories)
263 <<
" pdf configs for " << nInputs
264 <<
" input variables." << std::endl;
273 std::vector<unsigned int> pdfMap;
274 for(
unsigned int i = 0; i < nCategories; i++)
275 for(
unsigned int j = i;
j < pdfs.size();
j += nCategories)
278 double totalSig = std::accumulate(sigSum.begin(), sigSum.end(), 0.0);
279 double totalBkg = std::accumulate(bkgSum.begin(), bkgSum.end(), 0.0);
281 for(
unsigned int i = 0; i < pdfs.size(); i++) {
282 const SigBkg *
iter = &pdfs[pdfMap[
i]];
287 double factor = std::accumulate(iter->signal.distr.begin(),
288 iter->signal.distr.end(), 0.0);
292 factor = 1.0 / factor;
293 std::vector<double>
values(iter->signal.distr.size() + 2);
295 iter->signal.distr.end(),
297 std::bind1st(std::multiplies<double>(),
303 iter->background.range.min,
304 iter->background.range.max);
305 factor = std::accumulate(iter->background.distr.begin(),
306 iter->background.distr.end(), 0.0);
310 factor = 1.0 / factor;
312 iter->background.distr.end(),
314 std::bind1st(std::multiplies<double>(),
320 calib->
pdfs.push_back(pdf);
329 if (doGlobalBias || doCategoryBias || doGivenBias) {
330 for(
unsigned int i = 0; i < nCategories; i++) {
331 double bias = doGlobalBias
332 ? totalSig / totalBkg
335 bias *= this->bias[
i];
337 bias *= (sigSum[
i] / totalSig) /
338 (bkgSum[i] / totalBkg);
339 calib->
bias.push_back(bias);
346 void ProcLikelihood::trainBegin()
350 void ProcLikelihood::trainData(
const std::vector<double> *
values,
354 if (categoryIdx >= 0)
355 category = (int)values[categoryIdx].
front();
356 if (category < 0 || category >= (
int)nCategories)
367 for(std::vector<SigBkg>::iterator iter = pdfs.begin() +
category;
368 iter < pdfs.end(); iter += nCategories, values++) {
369 if (i++ == categoryIdx)
372 switch(iter->iteration) {
374 for(std::vector<double>::const_iterator
value =
377 iter->signal.range.min =
378 iter->signal.range.max = *
value;
379 iter->iteration = ITER_RANGE;
383 for(std::vector<double>::const_iterator
value =
386 iter->signal.range.min =
389 iter->signal.range.max =
400 PDF &pdf = target ? iter->signal : iter->background;
401 unsigned int n = pdf.distr.size() - 1;
402 double mult = 1.0 / pdf.range.width();
404 for(std::vector<double>::const_iterator
value =
405 values->begin();
value != values->end();
value++) {
406 double x = (*
value - pdf.range.min) * mult;
412 pdf.distr[(
unsigned int)(x * n + 0.5)] +=
weight;
417 static void smoothArray(
unsigned int n,
double *values,
unsigned int nTimes)
419 for(
unsigned int iter = 0; iter < nTimes; iter++) {
420 double hold = n > 0 ? values[0] : 0.0;
421 for(
unsigned int i = 0; i <
n; i++) {
422 double delta = hold * 0.1;
425 values[i - 1] +=
delta;
429 hold = values[i + 1];
430 values[i + 1] +=
delta;
438 void ProcLikelihood::trainEnd()
444 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
445 iter != pdfs.end(); iter++) {
446 switch(iter->iteration) {
449 iter->background.range = iter->signal.range;
450 iter->iteration = ITER_FILL;
454 iter->signal.distr.front() *= 2;
455 iter->signal.distr.back() *= 2;
456 smoothArray(iter->signal.distr.size(),
457 &iter->signal.distr.front(),
460 iter->background.distr.front() *= 2;
461 iter->background.distr.back() *= 2;
462 smoothArray(iter->background.distr.size(),
463 &iter->background.distr.front(),
466 iter->iteration = ITER_DONE;
476 if (done && monitoring) {
477 std::vector<SourceVariable*>
inputs = getInputs().get();
478 if (categoryIdx >= 0)
479 inputs.erase(inputs.begin() + categoryIdx);
481 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
482 iter != pdfs.end(); iter++) {
483 unsigned int idx = iter - pdfs.begin();
484 unsigned int catIdx = idx % nCategories;
485 unsigned int varIdx = idx / nCategories;
492 if (categoryIdx >= 0) {
493 name += Form(
"_CAT%d", catIdx);
494 title += Form(
" (cat. %d)", catIdx);
497 unsigned int n = iter->signal.distr.size() - 1;
498 double min = iter->signal.range.min -
499 0.5 * iter->signal.range.width() /
n;
500 double max = iter->signal.range.max +
501 0.5 * iter->signal.range.width() /
n;
502 TH1F *
histo = monitoring->book<TH1F>(name +
"_sig",
503 (name +
"_sig").c_str(),
504 (title +
" signal").c_str(), n + 1,
min,
max);
505 for(
unsigned int i = 0; i <
n; i++)
506 histo->SetBinContent(
507 i + 1, iter->signal.distr[i]);
509 n = iter->background.distr.size() - 1;
510 min = iter->background.range.min -
511 0.5 * iter->background.range.width() /
n;
512 max = iter->background.range.max +
513 0.5 * iter->background.range.width() /
n;
514 histo = monitoring->book<TH1F>(name +
"_bkg",
515 (name +
"_bkg").c_str(),
516 (title +
" background").c_str(),
518 for(
unsigned int i = 0; i <
n; i++)
519 histo->SetBinContent(
520 i + 1, iter->background.distr[i]);
525 static void xmlParsePDF(ProcLikelihood::PDF &pdf, DOMElement *elem)
528 std::strcmp(
XMLSimpleStr(elem->getNodeName()),
"pdf") != 0)
530 <<
"Expected pdf tag in sigbkg train data."
533 pdf.range.min = XMLDocument::readAttribute<double>(
elem,
"lower");
534 pdf.range.max = XMLDocument::readAttribute<double>(
elem,
"upper");
537 for(DOMNode *
node = elem->getFirstChild();
539 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
545 <<
"Expected value tag in train file."
548 pdf.distr.push_back(XMLDocument::readContent<double>(
node));
559 unsigned int category) :
560 source(source), name(name), category(category) {}
564 return source == other.source &&
565 name == other.name &&
566 category == other.category;
569 inline bool operator < (
const Id &other)
const
571 if (
source < other.source)
573 if (!(
source == other.source))
575 if (name < other.name)
577 if (!(name == other.name))
579 return category < other.category;
587 if (!exists(filename))
591 DOMElement *elem = xml.getRootNode();
593 "ProcLikelihood") != 0)
595 <<
"XML training data file has bad root node."
598 unsigned int version = XMLDocument::readAttribute<unsigned int>(
601 if (version < 1 || version > 2)
603 <<
"Unsupported version " << version
604 <<
"in train file." << std::endl;
607 for(node = elem->getFirstChild();
608 node; node = node->getNextSibling()) {
609 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
615 <<
"Expected categories tag in train file."
619 for(DOMNode *subNode = node->getFirstChild();
620 subNode; subNode = subNode->getNextSibling()) {
621 if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
624 if (i >= nCategories)
626 <<
"Too many categories in train "
627 "file." << std::endl;
632 <<
"Expected category tag in train "
633 "file." << std::endl;
635 elem =
static_cast<DOMElement*
>(subNode);
637 sigSum[
i] = XMLDocument::readAttribute<double>(
639 bkgSum[
i] = XMLDocument::readAttribute<double>(
645 <<
"Too few categories in train file."
651 std::map<Id, SigBkg*> pdfMap;
653 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
654 iter != pdfs.end(); ++
iter) {
655 SigBkg *ptr = &*
iter;
656 unsigned int idx = iter - pdfs.begin();
657 unsigned int catIdx = idx % nCategories;
658 unsigned int varIdx = idx / nCategories;
659 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
667 std::vector<SigBkg>::iterator cur = pdfs.begin();
669 for(node = node->getNextSibling();
670 node; node = node->getNextSibling()) {
671 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
677 <<
"Expected sigbkg tag in train file."
679 elem =
static_cast<DOMElement*
>(
node);
684 if (cur == pdfs.end())
686 <<
"Superfluous SigBkg in train data."
691 Id id(XMLDocument::readAttribute<std::string>(
693 XMLDocument::readAttribute<std::string>(
695 XMLDocument::readAttribute<unsigned int>(
696 elem,
"category", 0));
697 std::map<Id, SigBkg*>::const_iterator pos =
699 if (pos == pdfMap.end())
706 for(node = elem->getFirstChild();
707 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
708 node = node->getNextSibling());
709 DOMElement *elemSig =
710 node ?
static_cast<DOMElement*
>(
node) : 0;
712 for(node = node->getNextSibling();
713 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
714 node = node->getNextSibling());
715 while(node && node->getNodeType() != DOMNode::ELEMENT_NODE)
716 node = node->getNextSibling();
717 DOMElement *elemBkg =
718 node ?
static_cast<DOMElement*
>(
node) : 0;
720 for(node = node->getNextSibling();
721 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
722 node = node->getNextSibling());
725 <<
"Superfluous tags in sigbkg train data."
728 xmlParsePDF(pdf->signal, elemSig);
729 xmlParsePDF(pdf->background, elemBkg);
731 pdf->iteration = ITER_DONE;
736 if (version == 1 && cur != pdfs.end())
738 <<
"Missing SigBkg in train data." << std::endl;
742 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
743 iter != pdfs.end(); ++
iter) {
744 if (iter->iteration != ITER_DONE) {
753 static DOMElement *xmlStorePDF(DOMDocument *
doc,
754 const ProcLikelihood::PDF &pdf)
756 DOMElement *elem = doc->createElement(
XMLUniStr(
"pdf"));
761 for(std::vector<double>::const_iterator iter =
762 pdf.distr.begin(); iter != pdf.distr.end(); iter++) {
764 elem->appendChild(value);
774 XMLDocument xml(trainer->trainFileName(
this,
"xml"),
true);
778 DOMElement *elem = doc->createElement(
XMLUniStr(
"categories"));
779 xml.getRootNode()->appendChild(elem);
780 for(
unsigned int i = 0; i < nCategories; i++) {
781 DOMElement *category = doc->createElement(
XMLUniStr(
"category"));
782 elem->appendChild(category);
787 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
788 iter != pdfs.end(); iter++) {
789 elem = doc->createElement(
XMLUniStr(
"sigbkg"));
790 xml.getRootNode()->appendChild(elem);
792 unsigned int idx = iter - pdfs.begin();
793 unsigned int catIdx = idx % nCategories;
794 unsigned int varIdx = idx / nCategories;
795 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
802 if (categoryIdx >= 0)
805 elem->appendChild(xmlStorePDF(doc, iter->signal));
806 elem->appendChild(xmlStorePDF(doc, iter->background));
MVATrainerComputer * calib
bool operator<(const FedChannelConnection &, const FedChannelConnection &)
static bool hasAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name)
const T & max(const T &a, const T &b)
bool operator==(const QGLikelihoodParameters &lhs, const QGLikelihoodCategory &rhs)
Test if parameters are compatible with category.
static void writeAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name, const T &value)
tuple idx
DEBUGGING if hasattr(process,"trackMonIterativeTracking2012"): print "trackMonIterativeTracking2012 D...
XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument * createDocument(const std::string &root)
volatile std::atomic< bool > shutdown_flag false
static std::string const source
tuple size
Write out results.