11 #include <xercesc/dom/DOM.hpp>
25 XERCES_CPP_NAMESPACE_USE
27 using namespace PhysicsTools;
37 virtual ~ProcNormalize();
39 virtual void configure(DOMElement *
elem);
42 virtual void trainBegin();
43 virtual void trainData(
const std::vector<double> *
values,
45 virtual void trainEnd();
62 for(
unsigned int i = 0;
i < distr.size();
i++)
63 histo.setBinContent(
i + 1, distr[
i]);
68 std::vector<double> distr;
75 std::vector<PDF> pdfs;
77 unsigned int nCategories;
82 ProcNormalize::ProcNormalize(
const char *
name,
const AtomicId *
id,
90 ProcNormalize::~ProcNormalize()
94 void ProcNormalize::configure(DOMElement *
elem)
97 for(DOMNode *
node = elem->getFirstChild();
99 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
102 DOMElement *elem =
static_cast<DOMElement*
>(
node);
106 if (std::strcmp(nodeName,
"category") != 0) {
111 if (categoryIdx >= 0)
113 <<
"More than one category variable given."
117 unsigned int count = XMLDocument::readAttribute<unsigned int>(
124 for(DOMNode *
node = elem->getFirstChild();
126 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
130 if (std::strcmp(nodeName,
"category") == 0)
133 if (std::strcmp(nodeName,
"pdf") != 0)
135 <<
"Expected pdf tag in config section."
137 elem =
static_cast<DOMElement*
>(
node);
141 pdf.distr.resize(XMLDocument::readAttribute<unsigned int>(
144 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
148 XMLDocument::readAttribute<bool>(
elem,
"signal",
true);
150 XMLDocument::readAttribute<bool>(
elem,
"background",
true);
152 if (!pdf.fillSignal && !pdf.fillBackground)
154 <<
"Filling neither background nor signal "
155 "in config." << std::endl;
159 pdf.range.min = XMLDocument::readAttribute<double>(
161 pdf.range.max = XMLDocument::readAttribute<double>(
163 pdf.iteration = ITER_FILL;
165 pdf.iteration = ITER_EMPTY;
167 for(
unsigned int i = 0; i < nCategories; i++)
171 unsigned int nInputs = getInputs().size();
172 if (categoryIdx >= 0)
175 if (pdfs.size() != nInputs * nCategories)
177 <<
"Got " << (pdfs.size() / nCategories)
178 <<
" pdf configs for " << nInputs
179 <<
" input varibles." << std::endl;
186 std::vector<unsigned int> pdfMap;
187 for(
unsigned int i = 0; i < nCategories; i++)
188 for(
unsigned int j = i;
j < pdfs.size();
j += nCategories)
191 for(
unsigned int i = 0; i < pdfs.size(); i++)
192 calib->
distr.push_back(pdfs[pdfMap[i]]);
199 void ProcNormalize::trainBegin()
203 void ProcNormalize::trainData(
const std::vector<double> *
values,
207 if (categoryIdx >= 0)
208 category = (int)values[categoryIdx].
front();
209 if (category < 0 || category >= (
int)nCategories)
213 for(std::vector<PDF>::iterator iter = pdfs.begin() +
category;
214 iter < pdfs.end(); iter += nCategories, values++) {
215 if (i++ == categoryIdx)
218 switch(iter->iteration) {
220 for(std::vector<double>::const_iterator
value =
223 iter->range.min = iter->range.max = *
value;
224 iter->iteration = ITER_RANGE;
228 for(std::vector<double>::const_iterator
value =
231 iter->range.min =
std::min(iter->range.min,
233 iter->range.max =
std::max(iter->range.max,
243 if (!(target ? iter->fillSignal : iter->fillBackground))
246 unsigned int n = iter->distr.size() - 1;
247 double mult = 1.0 / iter->range.width();
249 for(std::vector<double>::const_iterator
value =
250 values->begin();
value != values->end();
value++) {
251 double x = (*
value - iter->range.min) * mult;
257 iter->distr[(
unsigned int)(x * n + 0.5)] +=
weight;
262 static void smoothArray(
unsigned int n,
double *values,
unsigned int nTimes)
264 for(
unsigned int iter = 0; iter < nTimes; iter++) {
265 double hold = n > 0 ? values[0] : 0.0;
266 for(
unsigned int i = 0; i <
n; i++) {
267 double delta = hold * 0.1;
270 values[i - 1] +=
delta;
274 hold = values[i + 1];
275 values[i + 1] +=
delta;
283 void ProcNormalize::trainEnd()
286 for(std::vector<PDF>::iterator iter = pdfs.begin();
287 iter != pdfs.end(); iter++) {
288 switch(iter->iteration) {
291 iter->iteration = ITER_FILL;
295 iter->distr.front() *= 2;
296 iter->distr.back() *= 2;
297 smoothArray(iter->distr.size(),
298 &iter->distr.front(),
301 iter->iteration = ITER_DONE;
311 if (done && monitoring) {
312 std::vector<SourceVariable*>
inputs = getInputs().get();
313 if (categoryIdx >= 0)
314 inputs.erase(inputs.begin() + categoryIdx);
316 for(std::vector<PDF>::iterator iter = pdfs.begin();
317 iter != pdfs.end(); iter++) {
318 unsigned int idx = iter - pdfs.begin();
319 unsigned int catIdx = idx % nCategories;
320 unsigned int varIdx = idx / nCategories;
327 if (categoryIdx >= 0) {
328 name += Form(
"_CAT%d", catIdx);
329 title += Form(
" (cat. %d)", catIdx);
332 unsigned int n = iter->distr.size() - 1;
333 double min = iter->range.min -
334 0.5 * iter->range.width() /
n;
335 double max = iter->range.max +
336 0.5 * iter->range.width() /
n;
337 TH1F *
histo = monitoring->book<TH1F>(name +
"_pdf",
338 name.c_str(), title.c_str(), n + 1,
min,
max);
339 for(
unsigned int i = 0; i <
n; i++)
340 histo->SetBinContent(i + 1, iter->distr[i]);
352 unsigned int category) :
353 source(source), name(name), category(category) {}
357 return source == other.source &&
358 name == other.name &&
359 category == other.category;
362 inline bool operator < (
const Id &other)
const
364 if (
source < other.source)
366 if (!(
source == other.source))
368 if (name < other.name)
370 if (!(name == other.name))
372 return category < other.category;
379 std::string
filename = trainer->trainFileName(
this,
"xml");
380 if (!exists(filename))
384 DOMElement *elem = xml.getRootNode();
386 "ProcNormalize") != 0)
388 <<
"XML training data file has bad root node."
391 unsigned int version = XMLDocument::readAttribute<unsigned int>(
394 if (version < 1 || version > 2)
396 <<
"Unsupported version " << version
397 <<
"in train file." << std::endl;
399 std::map<Id, PDF*> pdfMap;
401 for(std::vector<PDF>::iterator iter = pdfs.begin();
402 iter != pdfs.end(); ++iter) {
404 unsigned int idx = iter - pdfs.begin();
405 unsigned int catIdx = idx % nCategories;
406 unsigned int varIdx = idx / nCategories;
407 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
415 std::vector<PDF>::iterator cur = pdfs.begin();
417 for(DOMNode *
node = elem->getFirstChild();
419 if (
node->getNodeType() != DOMNode::ELEMENT_NODE)
424 <<
"Expected pdf tag in train file."
426 elem =
static_cast<DOMElement*
>(
node);
431 if (cur == pdfs.end())
433 <<
"Superfluous PDF in train data."
438 Id
id(XMLDocument::readAttribute<std::string>(
440 XMLDocument::readAttribute<std::string>(
442 XMLDocument::readAttribute<unsigned int>(
443 elem,
"category", 0));
444 std::map<Id, PDF*>::const_iterator
pos =
446 if (pos == pdfMap.end())
454 XMLDocument::readAttribute<double>(
elem,
"lower");
456 XMLDocument::readAttribute<double>(
elem,
"upper");
457 pdf->iteration = ITER_DONE;
460 for(DOMNode *subNode = elem->getFirstChild();
461 subNode; subNode = subNode->getNextSibling()) {
462 if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
468 <<
"Expected value tag in train file."
471 elem =
static_cast<DOMElement*
>(
node);
473 pdf->distr.push_back(
474 XMLDocument::readContent<double>(subNode));
478 if (version == 1 && cur != pdfs.end())
480 <<
"Missing PDF in train data." << std::endl;
483 for(std::vector<PDF>::const_iterator iter = pdfs.begin();
484 iter != pdfs.end(); ++iter) {
485 if (iter->iteration != ITER_DONE) {
494 void ProcNormalize::save()
496 XMLDocument xml(trainer->trainFileName(
this,
"xml"),
true);
497 DOMDocument *
doc = xml.createDocument(
"ProcNormalize");
500 for(std::vector<PDF>::const_iterator iter = pdfs.begin();
501 iter != pdfs.end(); iter++) {
502 DOMElement *elem = doc->createElement(
XMLUniStr(
"pdf"));
503 xml.getRootNode()->appendChild(elem);
505 unsigned int idx = iter - pdfs.begin();
506 unsigned int catIdx = idx % nCategories;
507 unsigned int varIdx = idx / nCategories;
508 if (categoryIdx >= 0 && (
int)varIdx >= categoryIdx)
515 if (categoryIdx >= 0)
521 for(std::vector<double>::const_iterator iter2 =
522 iter->distr.begin(); iter2 != iter->distr.end(); iter2++) {
525 elem->appendChild(value);
527 XMLDocument::writeContent<double>(
value,
doc, *iter2);
bool operator==(const CaloTower &t1, const CaloTower &t2)
detail::ThreadSafeRegistry< ParameterSetID, ParameterSet, ProcessParameterSetIDCache > Registry
MVATrainerComputer * calib
bool operator<(const FedChannelConnection &, const FedChannelConnection &)
static bool hasAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name)
const T & max(const T &a, const T &b)
static void writeAttribute(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, const char *name, const T &value)