00001 #include <algorithm>
00002 #include <iostream>
00003 #include <numeric>
00004 #include <iomanip>
00005 #include <cstring>
00006 #include <vector>
00007 #include <string>
00008 #include <memory>
00009 #include <map>
00010
00011 #include <xercesc/dom/DOM.hpp>
00012
00013 #include <TH1.h>
00014
00015 #include "FWCore/Utilities/interface/Exception.h"
00016
00017 #include "PhysicsTools/MVAComputer/interface/AtomicId.h"
00018
00019 #include "PhysicsTools/MVATrainer/interface/XMLSimpleStr.h"
00020 #include "PhysicsTools/MVATrainer/interface/XMLUniStr.h"
00021 #include "PhysicsTools/MVATrainer/interface/XMLDocument.h"
00022 #include "PhysicsTools/MVATrainer/interface/MVATrainer.h"
00023 #include "PhysicsTools/MVATrainer/interface/TrainProcessor.h"
00024
00025 XERCES_CPP_NAMESPACE_USE
00026
00027 using namespace PhysicsTools;
00028
00029 namespace {
00030
00031 class ProcLikelihood : public TrainProcessor {
00032 public:
00033 typedef TrainProcessor::Registry<ProcLikelihood>::Type Registry;
00034
00035 ProcLikelihood(const char *name, const AtomicId *id,
00036 MVATrainer *trainer);
00037 virtual ~ProcLikelihood();
00038
00039 virtual Variable::Flags getDefaultFlags() const
00040 { return Variable::FLAG_ALL; }
00041
00042 virtual void configure(DOMElement *elem);
00043 virtual Calibration::VarProcessor *getCalibration() const;
00044
00045 virtual void trainBegin();
00046 virtual void trainData(const std::vector<double> *values,
00047 bool target, double weight);
00048 virtual void trainEnd();
00049
00050 virtual bool load();
00051 virtual void save();
00052
00053 struct PDF {
00054 std::vector<double> distr;
00055 Calibration::HistogramD::Range range;
00056 };
00057
00058 private:
00059 enum Iteration {
00060 ITER_EMPTY,
00061 ITER_RANGE,
00062 ITER_FILL,
00063 ITER_DONE
00064 };
00065
00066 struct SigBkg {
00067 PDF signal;
00068 PDF background;
00069 unsigned int smooth;
00070 Iteration iteration;
00071 };
00072
00073 std::vector<SigBkg> pdfs;
00074 std::vector<double> sigSum;
00075 std::vector<double> bkgSum;
00076 std::vector<double> bias;
00077 int categoryIdx;
00078 bool logOutput;
00079 bool individual;
00080 bool neverUndefined;
00081 bool keepEmpty;
00082 unsigned int nCategories;
00083 bool doCategoryBias;
00084 bool doGivenBias;
00085 bool doGlobalBias;
00086 Iteration iteration;
00087 };
00088
00089 static ProcLikelihood::Registry registry("ProcLikelihood");
00090
00091 ProcLikelihood::ProcLikelihood(const char *name, const AtomicId *id,
00092 MVATrainer *trainer) :
00093 TrainProcessor(name, id, trainer),
00094 categoryIdx(-1),
00095 logOutput(false),
00096 individual(false),
00097 neverUndefined(true),
00098 keepEmpty(false),
00099 nCategories(1),
00100 doCategoryBias(false),
00101 doGivenBias(false),
00102 doGlobalBias(false),
00103 iteration(ITER_FILL)
00104 {
00105 }
00106
00107 ProcLikelihood::~ProcLikelihood()
00108 {
00109 }
00110
00111 void ProcLikelihood::configure(DOMElement *elem)
00112 {
00113 int i = 0;
00114 bool first = true;
00115 for(DOMNode *node = elem->getFirstChild();
00116 node; node = node->getNextSibling()) {
00117 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00118 continue;
00119
00120 DOMElement *elem = static_cast<DOMElement*>(node);
00121
00122 XMLSimpleStr nodeName(node->getNodeName());
00123
00124 if (std::strcmp(nodeName, "general") == 0) {
00125 if (!first)
00126 throw cms::Exception("ProcLikelihood")
00127 << "Config tag general needs to come "
00128 "first." << std::endl;
00129
00130 if (XMLDocument::hasAttribute(elem, "bias")) {
00131 double globalBias =
00132 XMLDocument::readAttribute<double>(
00133 elem, "bias");
00134 bias.push_back(globalBias);
00135 doGivenBias = true;
00136 } else
00137 doGivenBias = false;
00138
00139 doCategoryBias = XMLDocument::readAttribute<bool>(
00140 elem, "category_bias", false);
00141 doGlobalBias = XMLDocument::readAttribute<bool>(
00142 elem, "global_bias", false);
00143 logOutput = XMLDocument::readAttribute<bool>(
00144 elem, "log", false);
00145 individual = XMLDocument::readAttribute<bool>(
00146 elem, "individual", false);
00147 neverUndefined = !XMLDocument::readAttribute<bool>(
00148 elem, "strict", false);
00149 keepEmpty = !XMLDocument::readAttribute<bool>(
00150 elem, "ignore_empty", true);
00151
00152 first = false;
00153 continue;
00154 }
00155 first = false;
00156
00157 if (std::strcmp(nodeName, "bias_table") == 0) {
00158 if (!bias.empty())
00159 throw cms::Exception("ProcLikelihood")
00160 << "Bias can be only specified once."
00161 << std::endl;
00162
00163 for(DOMNode *subNode = node->getFirstChild();
00164 subNode; subNode = subNode->getNextSibling()) {
00165 if (subNode->getNodeType() !=
00166 DOMNode::ELEMENT_NODE)
00167 continue;
00168
00169 if (std::strcmp(XMLSimpleStr(
00170 subNode->getNodeName()),
00171 "bias") != 0)
00172 throw cms::Exception("ProcLikelihood")
00173 << "Expected bias tag in "
00174 "config." << std::endl;
00175
00176 bias.push_back(
00177 XMLDocument::readContent<double>(
00178 subNode));
00179 }
00180
00181 continue;
00182 }
00183
00184 if (std::strcmp(nodeName, "category") != 0) {
00185 i++;
00186 continue;
00187 }
00188
00189 if (categoryIdx >= 0)
00190 throw cms::Exception("ProcLikelihood")
00191 << "More than one category variable given."
00192 << std::endl;
00193
00194
00195 unsigned int count = XMLDocument::readAttribute<unsigned int>(
00196 elem, "count");
00197
00198 categoryIdx = i;
00199 nCategories = count;
00200 }
00201
00202 for(DOMNode *node = elem->getFirstChild();
00203 node; node = node->getNextSibling()) {
00204 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00205 continue;
00206
00207 XMLSimpleStr nodeName(node->getNodeName());
00208 if (std::strcmp(nodeName, "general") == 0 ||
00209 std::strcmp(nodeName, "bias_table") == 0 ||
00210 std::strcmp(nodeName, "category") == 0)
00211 continue;
00212
00213 if (std::strcmp(nodeName, "sigbkg") != 0)
00214 throw cms::Exception("ProcLikelihood")
00215 << "Expected sigbkg tag in config section."
00216 << std::endl;
00217 elem = static_cast<DOMElement*>(node);
00218
00219 SigBkg pdf;
00220
00221 unsigned int size = XMLDocument::readAttribute<unsigned int>(
00222 elem, "size", 50);
00223 pdf.signal.distr.resize(size);
00224 pdf.background.distr.resize(size);
00225
00226 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
00227 elem, "smooth", 0);
00228
00229 if (XMLDocument::hasAttribute(elem, "lower") &&
00230 XMLDocument::hasAttribute(elem, "upper")) {
00231 pdf.signal.range.min =
00232 XMLDocument::readAttribute<double>(
00233 elem, "lower");
00234 pdf.signal.range.max =
00235 XMLDocument::readAttribute<double>(
00236 elem, "upper");
00237 pdf.background.range = pdf.signal.range;
00238 pdf.iteration = ITER_FILL;
00239 } else
00240 pdf.iteration = ITER_EMPTY;
00241
00242 for(unsigned int i = 0; i < nCategories; i++)
00243 pdfs.push_back(pdf);
00244 }
00245
00246 unsigned int nInputs = getInputs().size();
00247 if (categoryIdx >= 0)
00248 nInputs--;
00249
00250 sigSum.resize(nCategories);
00251 bkgSum.resize(nCategories);
00252
00253 if (!doGivenBias && !bias.empty()) {
00254 doGivenBias = true;
00255 if (bias.size() != nCategories)
00256 throw cms::Exception("ProcLikelihood")
00257 << "Invalid number of category bias entries."
00258 << std::endl;
00259 }
00260 while (doGivenBias && bias.size() < nCategories)
00261 bias.push_back(bias.front());
00262
00263 if (pdfs.size() != nInputs * nCategories)
00264 throw cms::Exception("ProcLikelihood")
00265 << "Got " << (pdfs.size() / nCategories)
00266 << " pdf configs for " << nInputs
00267 << " input variables." << std::endl;
00268 }
00269
00270 Calibration::VarProcessor *ProcLikelihood::getCalibration() const
00271 {
00272 typedef Calibration::ProcLikelihood Calib;
00273
00274 Calibration::ProcLikelihood *calib = new Calibration::ProcLikelihood;
00275
00276 std::vector<unsigned int> pdfMap;
00277 for(unsigned int i = 0; i < nCategories; i++)
00278 for(unsigned int j = i; j < pdfs.size(); j += nCategories)
00279 pdfMap.push_back(j);
00280
00281 double totalSig = std::accumulate(sigSum.begin(), sigSum.end(), 0.0);
00282 double totalBkg = std::accumulate(bkgSum.begin(), bkgSum.end(), 0.0);
00283
00284 for(unsigned int i = 0; i < pdfs.size(); i++) {
00285 const SigBkg *iter = &pdfs[pdfMap[i]];
00286 Calibration::ProcLikelihood::SigBkg pdf;
00287
00288 pdf.signal = Calibration::HistogramF(iter->signal.distr.size(),
00289 iter->signal.range);
00290 double factor = std::accumulate(iter->signal.distr.begin(),
00291 iter->signal.distr.end(), 0.0);
00292 if (factor < 1e-20)
00293 factor = 1.0;
00294 else
00295 factor = 1.0 / factor;
00296 std::vector<double> values(iter->signal.distr.size() + 2);
00297 std::transform(iter->signal.distr.begin(),
00298 iter->signal.distr.end(),
00299 values.begin() + 1,
00300 std::bind1st(std::multiplies<double>(),
00301 factor));
00302 pdf.signal.setValues(values);
00303
00304 pdf.background =
00305 Calibration::HistogramF(iter->background.distr.size(),
00306 iter->background.range.min,
00307 iter->background.range.max);
00308 factor = std::accumulate(iter->background.distr.begin(),
00309 iter->background.distr.end(), 0.0);
00310 if (factor < 1e-20)
00311 factor = 1.0;
00312 else
00313 factor = 1.0 / factor;
00314 std::transform(iter->background.distr.begin(),
00315 iter->background.distr.end(),
00316 values.begin() + 1,
00317 std::bind1st(std::multiplies<double>(),
00318 factor));
00319 pdf.background.setValues(values);
00320
00321 pdf.useSplines = true;
00322
00323 calib->pdfs.push_back(pdf);
00324 }
00325
00326 calib->categoryIdx = categoryIdx &
00327 ~(~((1U << (Calib::kCategoryMax + 2)) - 1) >> 1);
00328 calib->categoryIdx |= (logOutput ? 1 : 0) << Calib::kLogOutput;
00329 calib->categoryIdx |= (individual ? 1 : 0) << Calib::kIndividual;
00330 calib->categoryIdx |= (neverUndefined ? 1 : 0) << Calib::kNeverUndefined;
00331 calib->categoryIdx |= (keepEmpty ? 1 : 0) << Calib::kKeepEmpty;
00332
00333 if (doGlobalBias || doCategoryBias || doGivenBias) {
00334 for(unsigned int i = 0; i < nCategories; i++) {
00335 double bias = doGlobalBias
00336 ? totalSig / totalBkg
00337 : 1.0;
00338 if (doGivenBias)
00339 bias *= this->bias[i];
00340 if (doCategoryBias)
00341 bias *= (sigSum[i] / totalSig) /
00342 (bkgSum[i] / totalBkg);
00343 calib->bias.push_back(bias);
00344 }
00345 }
00346
00347 return calib;
00348 }
00349
00350 void ProcLikelihood::trainBegin()
00351 {
00352 }
00353
00354 void ProcLikelihood::trainData(const std::vector<double> *values,
00355 bool target, double weight)
00356 {
00357 int category = 0;
00358 if (categoryIdx >= 0)
00359 category = (int)values[categoryIdx].front();
00360 if (category < 0 || category >= (int)nCategories)
00361 return;
00362
00363 if (iteration == ITER_FILL) {
00364 if (target)
00365 sigSum[category] += weight;
00366 else
00367 bkgSum[category] += weight;
00368 }
00369
00370 int i = 0;
00371 for(std::vector<SigBkg>::iterator iter = pdfs.begin() + category;
00372 iter < pdfs.end(); iter += nCategories, values++) {
00373 if (i++ == categoryIdx)
00374 values++;
00375
00376 switch(iter->iteration) {
00377 case ITER_EMPTY:
00378 for(std::vector<double>::const_iterator value =
00379 values->begin();
00380 value != values->end(); value++) {
00381 iter->signal.range.min =
00382 iter->signal.range.max = *value;
00383 iter->iteration = ITER_RANGE;
00384 break;
00385 }
00386 case ITER_RANGE:
00387 for(std::vector<double>::const_iterator value =
00388 values->begin();
00389 value != values->end(); value++) {
00390 iter->signal.range.min =
00391 std::min(iter->signal.range.min,
00392 *value);
00393 iter->signal.range.max =
00394 std::max(iter->signal.range.max,
00395 *value);
00396 }
00397 continue;
00398 case ITER_FILL:
00399 break;
00400 default:
00401 continue;
00402 }
00403
00404 PDF &pdf = target ? iter->signal : iter->background;
00405 unsigned int n = pdf.distr.size() - 1;
00406 double mult = 1.0 / pdf.range.width();
00407
00408 for(std::vector<double>::const_iterator value =
00409 values->begin(); value != values->end(); value++) {
00410 double x = (*value - pdf.range.min) * mult;
00411 if (x < 0.0)
00412 x = 0.0;
00413 else if (x >= 1.0)
00414 x = 1.0;
00415
00416 pdf.distr[(unsigned int)(x * n + 0.5)] += weight;
00417 }
00418 }
00419 }
00420
00421 static void smoothArray(unsigned int n, double *values, unsigned int nTimes)
00422 {
00423 for(unsigned int iter = 0; iter < nTimes; iter++) {
00424 double hold = n > 0 ? values[0] : 0.0;
00425 for(unsigned int i = 0; i < n; i++) {
00426 double delta = hold * 0.1;
00427 double rem = 0.0;
00428 if (i > 0) {
00429 values[i - 1] += delta;
00430 rem -= delta;
00431 }
00432 if (i < n - 1) {
00433 hold = values[i + 1];
00434 values[i + 1] += delta;
00435 rem -= delta;
00436 }
00437 values[i] += rem;
00438 }
00439 }
00440 }
00441
00442 void ProcLikelihood::trainEnd()
00443 {
00444 bool done = true;
00445 if (iteration == ITER_FILL)
00446 iteration = ITER_DONE;
00447
00448 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00449 iter != pdfs.end(); iter++) {
00450 switch(iter->iteration) {
00451 case ITER_EMPTY:
00452 case ITER_RANGE:
00453 iter->background.range = iter->signal.range;
00454 iter->iteration = ITER_FILL;
00455 done = false;
00456 break;
00457 case ITER_FILL:
00458 iter->signal.distr.front() *= 2;
00459 iter->signal.distr.back() *= 2;
00460 smoothArray(iter->signal.distr.size(),
00461 &iter->signal.distr.front(),
00462 iter->smooth);
00463
00464 iter->background.distr.front() *= 2;
00465 iter->background.distr.back() *= 2;
00466 smoothArray(iter->background.distr.size(),
00467 &iter->background.distr.front(),
00468 iter->smooth);
00469
00470 iter->iteration = ITER_DONE;
00471 break;
00472 default:
00473 ;
00474 }
00475 }
00476
00477 if (done)
00478 trained = true;
00479
00480 if (done && monitoring) {
00481 std::vector<SourceVariable*> inputs = getInputs().get();
00482 if (categoryIdx >= 0)
00483 inputs.erase(inputs.begin() + categoryIdx);
00484
00485 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00486 iter != pdfs.end(); iter++) {
00487 unsigned int idx = iter - pdfs.begin();
00488 unsigned int catIdx = idx % nCategories;
00489 unsigned int varIdx = idx / nCategories;
00490 SourceVariable *var = inputs[varIdx];
00491 std::string name =
00492 (const char*)var->getSource()->getName()
00493 + std::string("_")
00494 + (const char*)var->getName();
00495 std::string title = name;
00496 if (categoryIdx >= 0) {
00497 name += Form("_CAT%d", catIdx);
00498 title += Form(" (cat. %d)", catIdx);
00499 }
00500
00501 unsigned int n = iter->signal.distr.size() - 1;
00502 double min = iter->signal.range.min -
00503 0.5 * iter->signal.range.width() / n;
00504 double max = iter->signal.range.max +
00505 0.5 * iter->signal.range.width() / n;
00506 TH1F *histo = monitoring->book<TH1F>(name + "_sig",
00507 (name + "_sig").c_str(),
00508 (title + " signal").c_str(), n + 1, min, max);
00509 for(unsigned int i = 0; i < n; i++)
00510 histo->SetBinContent(
00511 i + 1, iter->signal.distr[i]);
00512
00513 n = iter->background.distr.size() - 1;
00514 min = iter->background.range.min -
00515 0.5 * iter->background.range.width() / n;
00516 max = iter->background.range.max +
00517 0.5 * iter->background.range.width() / n;
00518 histo = monitoring->book<TH1F>(name + "_bkg",
00519 (name + "_bkg").c_str(),
00520 (title + " background").c_str(),
00521 n + 1, min, max);
00522 for(unsigned int i = 0; i < n; i++)
00523 histo->SetBinContent(
00524 i + 1, iter->background.distr[i]);
00525 }
00526 }
00527 }
00528
00529 static void xmlParsePDF(ProcLikelihood::PDF &pdf, DOMElement *elem)
00530 {
00531 if (!elem ||
00532 std::strcmp(XMLSimpleStr(elem->getNodeName()), "pdf") != 0)
00533 throw cms::Exception("ProcLikelihood")
00534 << "Expected pdf tag in sigbkg train data."
00535 << std::endl;
00536
00537 pdf.range.min = XMLDocument::readAttribute<double>(elem, "lower");
00538 pdf.range.max = XMLDocument::readAttribute<double>(elem, "upper");
00539
00540 pdf.distr.clear();
00541 for(DOMNode *node = elem->getFirstChild();
00542 node; node = node->getNextSibling()) {
00543 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00544 continue;
00545
00546 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00547 "value") != 0)
00548 throw cms::Exception("ProcLikelihood")
00549 << "Expected value tag in train file."
00550 << std::endl;
00551
00552 pdf.distr.push_back(XMLDocument::readContent<double>(node));
00553 }
00554 }
00555
00556 namespace {
00557 struct Id {
00558 AtomicId source;
00559 AtomicId name;
00560 unsigned int category;
00561
00562 inline Id(AtomicId source, AtomicId name,
00563 unsigned int category) :
00564 source(source), name(name), category(category) {}
00565
00566 inline bool operator == (const Id &other) const
00567 {
00568 return source == other.source &&
00569 name == other.name &&
00570 category == other.category;
00571 }
00572
00573 inline bool operator < (const Id &other) const
00574 {
00575 if (source < other.source)
00576 return true;
00577 if (!(source == other.source))
00578 return false;
00579 if (name < other.name)
00580 return true;
00581 if (!(name == other.name))
00582 return false;
00583 return category < other.category;
00584 }
00585 };
00586 }
00587
00588 bool ProcLikelihood::load()
00589 {
00590 std::string filename = trainer->trainFileName(this, "xml");
00591 if (!exists(filename))
00592 return false;
00593
00594 XMLDocument xml(filename);
00595 DOMElement *elem = xml.getRootNode();
00596 if (std::strcmp(XMLSimpleStr(elem->getNodeName()),
00597 "ProcLikelihood") != 0)
00598 throw cms::Exception("ProcLikelihood")
00599 << "XML training data file has bad root node."
00600 << std::endl;
00601
00602 unsigned int version = XMLDocument::readAttribute<unsigned int>(
00603 elem, "version", 1);
00604
00605 if (version < 1 || version > 2)
00606 throw cms::Exception("ProcLikelihood")
00607 << "Unsupported version " << version
00608 << "in train file." << std::endl;
00609
00610 DOMNode *node;
00611 for(node = elem->getFirstChild();
00612 node; node = node->getNextSibling()) {
00613 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00614 continue;
00615
00616 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00617 "categories") != 0)
00618 throw cms::Exception("ProcLikelihood")
00619 << "Expected categories tag in train file."
00620 << std::endl;
00621
00622 unsigned int i = 0;
00623 for(DOMNode *subNode = node->getFirstChild();
00624 subNode; subNode = subNode->getNextSibling()) {
00625 if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
00626 continue;
00627
00628 if (i >= nCategories)
00629 throw cms::Exception("ProcLikelihood")
00630 << "Too many categories in train "
00631 "file." << std::endl;
00632
00633 if (std::strcmp(XMLSimpleStr(subNode->getNodeName()),
00634 "category") != 0)
00635 throw cms::Exception("ProcLikelihood")
00636 << "Expected category tag in train "
00637 "file." << std::endl;
00638
00639 elem = static_cast<DOMElement*>(subNode);
00640
00641 sigSum[i] = XMLDocument::readAttribute<double>(
00642 elem, "signal");
00643 bkgSum[i] = XMLDocument::readAttribute<double>(
00644 elem, "background");
00645 i++;
00646 }
00647 if (i < nCategories)
00648 throw cms::Exception("ProcLikelihood")
00649 << "Too few categories in train file."
00650 << std::endl;
00651
00652 break;
00653 }
00654
00655 std::map<Id, SigBkg*> pdfMap;
00656
00657 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00658 iter != pdfs.end(); ++iter) {
00659 SigBkg *ptr = &*iter;
00660 unsigned int idx = iter - pdfs.begin();
00661 unsigned int catIdx = idx % nCategories;
00662 unsigned int varIdx = idx / nCategories;
00663 if (categoryIdx >= 0 && (int)varIdx >= categoryIdx)
00664 varIdx++;
00665 const SourceVariable *var = getInputs().get()[varIdx];
00666 Id id(var->getSource()->getName(), var->getName(), catIdx);
00667
00668 pdfMap[id] = ptr;
00669 }
00670
00671 std::vector<SigBkg>::iterator cur = pdfs.begin();
00672
00673 for(node = node->getNextSibling();
00674 node; node = node->getNextSibling()) {
00675 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00676 continue;
00677
00678 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00679 "sigbkg") != 0)
00680 throw cms::Exception("ProcLikelihood")
00681 << "Expected sigbkg tag in train file."
00682 << std::endl;
00683 elem = static_cast<DOMElement*>(node);
00684
00685 SigBkg *pdf = 0;
00686 switch(version) {
00687 case 1:
00688 if (cur == pdfs.end())
00689 throw cms::Exception("ProcLikelihood")
00690 << "Superfluous SigBkg in train data."
00691 << std::endl;
00692 pdf = &*cur++;
00693 break;
00694 case 2: {
00695 Id id(XMLDocument::readAttribute<std::string>(
00696 elem, "source"),
00697 XMLDocument::readAttribute<std::string>(
00698 elem, "name"),
00699 XMLDocument::readAttribute<unsigned int>(
00700 elem, "category", 0));
00701 std::map<Id, SigBkg*>::const_iterator pos =
00702 pdfMap.find(id);
00703 if (pos == pdfMap.end())
00704 continue;
00705 else
00706 pdf = pos->second;
00707 } break;
00708 }
00709
00710 for(node = elem->getFirstChild();
00711 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00712 node = node->getNextSibling());
00713 DOMElement *elemSig =
00714 node ? static_cast<DOMElement*>(node) : 0;
00715
00716 for(node = node->getNextSibling();
00717 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00718 node = node->getNextSibling());
00719 while(node && node->getNodeType() != DOMNode::ELEMENT_NODE)
00720 node = node->getNextSibling();
00721 DOMElement *elemBkg =
00722 node ? static_cast<DOMElement*>(node) : 0;
00723
00724 for(node = node->getNextSibling();
00725 node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00726 node = node->getNextSibling());
00727 if (node)
00728 throw cms::Exception("ProcLikelihood")
00729 << "Superfluous tags in sigbkg train data."
00730 << std::endl;
00731
00732 xmlParsePDF(pdf->signal, elemSig);
00733 xmlParsePDF(pdf->background, elemBkg);
00734
00735 pdf->iteration = ITER_DONE;
00736
00737 node = elem;
00738 }
00739
00740 if (version == 1 && cur != pdfs.end())
00741 throw cms::Exception("ProcLikelihood")
00742 << "Missing SigBkg in train data." << std::endl;
00743
00744 iteration = ITER_DONE;
00745 trained = true;
00746 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
00747 iter != pdfs.end(); ++iter) {
00748 if (iter->iteration != ITER_DONE) {
00749 trained = false;
00750 break;
00751 }
00752 }
00753
00754 return true;
00755 }
00756
00757 static DOMElement *xmlStorePDF(DOMDocument *doc,
00758 const ProcLikelihood::PDF &pdf)
00759 {
00760 DOMElement *elem = doc->createElement(XMLUniStr("pdf"));
00761
00762 XMLDocument::writeAttribute(elem, "lower", pdf.range.min);
00763 XMLDocument::writeAttribute(elem, "upper", pdf.range.max);
00764
00765 for(std::vector<double>::const_iterator iter =
00766 pdf.distr.begin(); iter != pdf.distr.end(); iter++) {
00767 DOMElement *value = doc->createElement(XMLUniStr("value"));
00768 elem->appendChild(value);
00769
00770 XMLDocument::writeContent<double>(value, doc, *iter);
00771 }
00772
00773 return elem;
00774 }
00775
00776 void ProcLikelihood::save()
00777 {
00778 XMLDocument xml(trainer->trainFileName(this, "xml"), true);
00779 DOMDocument *doc = xml.createDocument("ProcLikelihood");
00780 XMLDocument::writeAttribute(doc->getDocumentElement(), "version", 2);
00781
00782 DOMElement *elem = doc->createElement(XMLUniStr("categories"));
00783 xml.getRootNode()->appendChild(elem);
00784 for(unsigned int i = 0; i < nCategories; i++) {
00785 DOMElement *category = doc->createElement(XMLUniStr("category"));
00786 elem->appendChild(category);
00787 XMLDocument::writeAttribute(category, "signal", sigSum[i]);
00788 XMLDocument::writeAttribute(category, "background", bkgSum[i]);
00789 }
00790
00791 for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
00792 iter != pdfs.end(); iter++) {
00793 elem = doc->createElement(XMLUniStr("sigbkg"));
00794 xml.getRootNode()->appendChild(elem);
00795
00796 unsigned int idx = iter - pdfs.begin();
00797 unsigned int catIdx = idx % nCategories;
00798 unsigned int varIdx = idx / nCategories;
00799 if (categoryIdx >= 0 && (int)varIdx >= categoryIdx)
00800 varIdx++;
00801 const SourceVariable *var = getInputs().get()[varIdx];
00802 XMLDocument::writeAttribute(elem, "source",
00803 (const char*)var->getSource()->getName());
00804 XMLDocument::writeAttribute(elem, "name",
00805 (const char*)var->getName());
00806 if (categoryIdx >= 0)
00807 XMLDocument::writeAttribute(elem, "category", catIdx);
00808
00809 elem->appendChild(xmlStorePDF(doc, iter->signal));
00810 elem->appendChild(xmlStorePDF(doc, iter->background));
00811 }
00812 }
00813
00814 }