CMS 3D CMS Logo

ProcLikelihood.cc

Go to the documentation of this file.
00001 #include <algorithm>
00002 #include <iostream>
00003 #include <numeric>
00004 #include <iomanip>
00005 #include <cstring>
00006 #include <vector>
00007 #include <string>
00008 #include <memory>
00009 #include <map>
00010 
00011 #include <xercesc/dom/DOM.hpp>
00012 
00013 #include <TH1.h>
00014 
00015 #include "FWCore/Utilities/interface/Exception.h"
00016 
00017 #include "PhysicsTools/MVAComputer/interface/AtomicId.h"
00018 
00019 #include "PhysicsTools/MVATrainer/interface/XMLSimpleStr.h"
00020 #include "PhysicsTools/MVATrainer/interface/XMLUniStr.h"
00021 #include "PhysicsTools/MVATrainer/interface/XMLDocument.h"
00022 #include "PhysicsTools/MVATrainer/interface/MVATrainer.h"
00023 #include "PhysicsTools/MVATrainer/interface/TrainProcessor.h"
00024 
00025 XERCES_CPP_NAMESPACE_USE
00026 
00027 using namespace PhysicsTools;
00028 
00029 namespace { // anonymous
00030 
00031 class ProcLikelihood : public TrainProcessor {
00032     public:
00033         typedef TrainProcessor::Registry<ProcLikelihood>::Type Registry;
00034 
00035         ProcLikelihood(const char *name, const AtomicId *id,
00036                        MVATrainer *trainer);
00037         virtual ~ProcLikelihood();
00038 
00039         virtual Variable::Flags getDefaultFlags() const
00040         { return Variable::FLAG_ALL; }
00041 
00042         virtual void configure(DOMElement *elem);
00043         virtual Calibration::VarProcessor *getCalibration() const;
00044 
00045         virtual void trainBegin();
00046         virtual void trainData(const std::vector<double> *values,
00047                                bool target, double weight);
00048         virtual void trainEnd();
00049 
00050         virtual bool load();
00051         virtual void save();
00052 
00053         struct PDF {
00054                 std::vector<double>             distr;
00055                 Calibration::HistogramD::Range  range;
00056         };
00057 
00058     private:
00059         enum Iteration {
00060                 ITER_EMPTY,
00061                 ITER_RANGE,
00062                 ITER_FILL,
00063                 ITER_DONE
00064         };
00065 
00066         struct SigBkg {
00067                 PDF             signal;
00068                 PDF             background;
00069                 unsigned int    smooth;
00070                 Iteration       iteration;
00071         };
00072 
00073         std::vector<SigBkg>     pdfs;
00074         std::vector<double>     sigSum;
00075         std::vector<double>     bkgSum;
00076         std::vector<double>     bias;
00077         int                     categoryIdx;
00078         bool                    logOutput;
00079         bool                    individual;
00080         bool                    neverUndefined;
00081         bool                    keepEmpty;
00082         unsigned int            nCategories;
00083         bool                    doCategoryBias;
00084         bool                    doGivenBias;
00085         bool                    doGlobalBias;
00086         Iteration               iteration;
00087 };
00088 
00089 static ProcLikelihood::Registry registry("ProcLikelihood");
00090 
00091 ProcLikelihood::ProcLikelihood(const char *name, const AtomicId *id,
00092                                MVATrainer *trainer) :
00093         TrainProcessor(name, id, trainer),
00094         categoryIdx(-1),
00095         logOutput(false),
00096         individual(false),
00097         neverUndefined(true),
00098         keepEmpty(false),
00099         nCategories(1),
00100         doCategoryBias(false),
00101         doGivenBias(false),
00102         doGlobalBias(false),
00103         iteration(ITER_FILL)
00104 {
00105 }
00106 
00107 ProcLikelihood::~ProcLikelihood()
00108 {
00109 }
00110 
00111 void ProcLikelihood::configure(DOMElement *elem)
00112 {
00113         int i = 0;
00114         bool first = true;
00115         for(DOMNode *node = elem->getFirstChild();
00116             node; node = node->getNextSibling()) {
00117                 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00118                         continue;
00119 
00120                 DOMElement *elem = static_cast<DOMElement*>(node);
00121 
00122                 XMLSimpleStr nodeName(node->getNodeName());
00123 
00124                 if (std::strcmp(nodeName, "general") == 0) {
00125                         if (!first)
00126                                 throw cms::Exception("ProcLikelihood")
00127                                         << "Config tag general needs to come "
00128                                            "first." << std::endl;
00129 
00130                         if (XMLDocument::hasAttribute(elem, "bias")) {
00131                                 double globalBias =
00132                                         XMLDocument::readAttribute<double>(
00133                                                                 elem, "bias");
00134                                 bias.push_back(globalBias);
00135                                 doGivenBias = true;
00136                         } else
00137                                 doGivenBias = false;
00138 
00139                         doCategoryBias = XMLDocument::readAttribute<bool>(
00140                                                 elem, "category_bias", false);
00141                         doGlobalBias = XMLDocument::readAttribute<bool>(
00142                                                 elem, "global_bias", false);
00143                         logOutput = XMLDocument::readAttribute<bool>(
00144                                                 elem, "log", false);
00145                         individual = XMLDocument::readAttribute<bool>(
00146                                                 elem, "individual", false);
00147                         neverUndefined = !XMLDocument::readAttribute<bool>(
00148                                                 elem, "strict", false);
00149                         keepEmpty = !XMLDocument::readAttribute<bool>(
00150                                                 elem, "ignore_empty", true);
00151 
00152                         first = false;
00153                         continue;
00154                 }
00155                 first = false;
00156 
00157                 if (std::strcmp(nodeName, "bias_table") == 0) {
00158                         if (!bias.empty())
00159                                 throw cms::Exception("ProcLikelihood")
00160                                         << "Bias can be only specified once."
00161                                         << std::endl;
00162 
00163                         for(DOMNode *subNode = node->getFirstChild();
00164                             subNode; subNode = subNode->getNextSibling()) {
00165                                 if (subNode->getNodeType() !=
00166                                     DOMNode::ELEMENT_NODE)
00167                                         continue;
00168 
00169                                 if (std::strcmp(XMLSimpleStr(
00170                                                 subNode->getNodeName()),
00171                                                 "bias") != 0)
00172                                         throw cms::Exception("ProcLikelihood")
00173                                                 << "Expected bias tag in "
00174                                                    "config." << std::endl;
00175 
00176                                 bias.push_back(
00177                                         XMLDocument::readContent<double>(
00178                                                                 subNode));
00179                         }
00180 
00181                         continue;
00182                 }
00183 
00184                 if (std::strcmp(nodeName, "category") != 0) {
00185                         i++;
00186                         continue;
00187                 }
00188 
00189                 if (categoryIdx >= 0)
00190                         throw cms::Exception("ProcLikelihood")
00191                                 << "More than one category variable given."
00192                                 << std::endl;
00193 
00194 
00195                 unsigned int count = XMLDocument::readAttribute<unsigned int>(
00196                                                                 elem, "count");
00197 
00198                 categoryIdx = i;
00199                 nCategories = count;
00200         }
00201 
00202         for(DOMNode *node = elem->getFirstChild();
00203             node; node = node->getNextSibling()) {
00204                 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00205                         continue;
00206 
00207                 XMLSimpleStr nodeName(node->getNodeName());
00208                 if (std::strcmp(nodeName, "general") == 0 ||
00209                     std::strcmp(nodeName, "bias_table") == 0 ||
00210                     std::strcmp(nodeName, "category") == 0)
00211                         continue;
00212 
00213                 if (std::strcmp(nodeName, "sigbkg") != 0)
00214                         throw cms::Exception("ProcLikelihood")
00215                                 << "Expected sigbkg tag in config section."
00216                                 << std::endl;
00217                 elem = static_cast<DOMElement*>(node);
00218 
00219                 SigBkg pdf;
00220 
00221                 unsigned int size = XMLDocument::readAttribute<unsigned int>(
00222                                                         elem, "size", 50);
00223                 pdf.signal.distr.resize(size);
00224                 pdf.background.distr.resize(size);
00225 
00226                 pdf.smooth = XMLDocument::readAttribute<unsigned int>(
00227                                                         elem, "smooth", 0);
00228 
00229                 if (XMLDocument::hasAttribute(elem, "lower") &&
00230                     XMLDocument::hasAttribute(elem, "upper")) {
00231                         pdf.signal.range.min =
00232                                 XMLDocument::readAttribute<double>(
00233                                                                 elem, "lower");
00234                         pdf.signal.range.max =
00235                                 XMLDocument::readAttribute<double>(
00236                                                                 elem, "upper");
00237                         pdf.background.range = pdf.signal.range;
00238                         pdf.iteration = ITER_FILL;
00239                 } else
00240                         pdf.iteration = ITER_EMPTY;
00241 
00242                 for(unsigned int i = 0; i < nCategories; i++)
00243                         pdfs.push_back(pdf);
00244         }
00245 
00246         unsigned int nInputs = getInputs().size();
00247         if (categoryIdx >= 0)
00248                 nInputs--;
00249 
00250         sigSum.resize(nCategories);
00251         bkgSum.resize(nCategories);
00252 
00253         if (!doGivenBias && !bias.empty()) {
00254                 doGivenBias = true;
00255                 if (bias.size() != nCategories)
00256                         throw cms::Exception("ProcLikelihood")
00257                                 << "Invalid number of category bias entries."
00258                                 << std::endl;
00259         }
00260         while (doGivenBias && bias.size() < nCategories)
00261                 bias.push_back(bias.front());
00262 
00263         if (pdfs.size() != nInputs * nCategories)
00264                 throw cms::Exception("ProcLikelihood")
00265                         << "Got " << (pdfs.size() / nCategories)
00266                         << " pdf configs for " << nInputs
00267                         << " input variables." << std::endl;
00268 }
00269 
00270 Calibration::VarProcessor *ProcLikelihood::getCalibration() const
00271 {
00272         typedef Calibration::ProcLikelihood Calib;
00273 
00274         Calibration::ProcLikelihood *calib = new Calibration::ProcLikelihood;
00275 
00276         std::vector<unsigned int> pdfMap;
00277         for(unsigned int i = 0; i < nCategories; i++)
00278                 for(unsigned int j = i; j < pdfs.size(); j += nCategories)
00279                         pdfMap.push_back(j);
00280 
00281         double totalSig = std::accumulate(sigSum.begin(), sigSum.end(), 0.0);
00282         double totalBkg = std::accumulate(bkgSum.begin(), bkgSum.end(), 0.0);
00283 
00284         for(unsigned int i = 0; i < pdfs.size(); i++) {
00285                 const SigBkg *iter = &pdfs[pdfMap[i]];
00286                 Calibration::ProcLikelihood::SigBkg pdf;
00287 
00288                 pdf.signal = Calibration::HistogramF(iter->signal.distr.size(),
00289                                                      iter->signal.range);
00290                 double factor = std::accumulate(iter->signal.distr.begin(),
00291                                                 iter->signal.distr.end(), 0.0);
00292                 if (factor < 1e-20)
00293                         factor = 1.0;
00294                 else
00295                         factor = 1.0 / factor;
00296                 std::vector<double> values(iter->signal.distr.size() + 2);
00297                 std::transform(iter->signal.distr.begin(),
00298                                iter->signal.distr.end(),
00299                                values.begin() + 1,
00300                                std::bind1st(std::multiplies<double>(),
00301                                             factor));
00302                 pdf.signal.setValues(values);
00303 
00304                 pdf.background =
00305                         Calibration::HistogramF(iter->background.distr.size(),
00306                                                 iter->background.range.min,
00307                                                 iter->background.range.max);
00308                 factor = std::accumulate(iter->background.distr.begin(),
00309                                          iter->background.distr.end(), 0.0);
00310                 if (factor < 1e-20)
00311                         factor = 1.0;
00312                 else
00313                         factor = 1.0 / factor;
00314                 std::transform(iter->background.distr.begin(),
00315                                iter->background.distr.end(),
00316                                values.begin() + 1,
00317                                std::bind1st(std::multiplies<double>(),
00318                                             factor));
00319                 pdf.background.setValues(values);
00320 
00321                 pdf.useSplines = true;
00322 
00323                 calib->pdfs.push_back(pdf);
00324         }
00325 
00326         calib->categoryIdx = categoryIdx &
00327                         ~(~((1U << (Calib::kCategoryMax + 2)) - 1) >> 1);
00328         calib->categoryIdx |= (logOutput ? 1 : 0) << Calib::kLogOutput;
00329         calib->categoryIdx |= (individual ? 1 : 0) << Calib::kIndividual;
00330         calib->categoryIdx |= (neverUndefined ? 1 : 0) << Calib::kNeverUndefined;
00331         calib->categoryIdx |= (keepEmpty ? 1 : 0) << Calib::kKeepEmpty;
00332 
00333         if (doGlobalBias || doCategoryBias || doGivenBias) {
00334                 for(unsigned int i = 0; i < nCategories; i++) {
00335                         double bias = doGlobalBias
00336                                                 ? totalSig / totalBkg
00337                                                 : 1.0;
00338                         if (doGivenBias)
00339                                 bias *= this->bias[i];
00340                         if (doCategoryBias)
00341                                 bias *= (sigSum[i] / totalSig) /
00342                                         (bkgSum[i] / totalBkg);
00343                         calib->bias.push_back(bias);
00344                 }
00345         }
00346 
00347         return calib;
00348 }
00349 
00350 void ProcLikelihood::trainBegin()
00351 {
00352 }
00353 
00354 void ProcLikelihood::trainData(const std::vector<double> *values,
00355                                bool target, double weight)
00356 {
00357         int category = 0;
00358         if (categoryIdx >= 0)
00359                 category = (int)values[categoryIdx].front();
00360         if (category < 0 || category >= (int)nCategories)
00361                 return;
00362 
00363         if (iteration == ITER_FILL) {
00364                 if (target)
00365                         sigSum[category] += weight;
00366                 else
00367                         bkgSum[category] += weight;
00368         }
00369 
00370         int i = 0;
00371         for(std::vector<SigBkg>::iterator iter = pdfs.begin() + category;
00372             iter < pdfs.end(); iter += nCategories, values++) {
00373                 if (i++ == categoryIdx)
00374                         values++;
00375 
00376                 switch(iter->iteration) {
00377                     case ITER_EMPTY:
00378                         for(std::vector<double>::const_iterator value =
00379                                                         values->begin();
00380                                 value != values->end(); value++) {
00381                                 iter->signal.range.min =
00382                                         iter->signal.range.max = *value;
00383                                 iter->iteration = ITER_RANGE;
00384                                 break;
00385                         }
00386                     case ITER_RANGE:
00387                         for(std::vector<double>::const_iterator value =
00388                                                         values->begin();
00389                                 value != values->end(); value++) {
00390                                 iter->signal.range.min =
00391                                         std::min(iter->signal.range.min,
00392                                                  *value);
00393                                 iter->signal.range.max =
00394                                         std::max(iter->signal.range.max,
00395                                                  *value);
00396                         }
00397                         continue;
00398                     case ITER_FILL:
00399                         break;
00400                     default:
00401                         continue;
00402                 }
00403 
00404                 PDF &pdf = target ? iter->signal : iter->background;
00405                 unsigned int n = pdf.distr.size() - 1;
00406                 double mult = 1.0 / pdf.range.width();
00407  
00408                 for(std::vector<double>::const_iterator value =
00409                         values->begin(); value != values->end(); value++) {
00410                         double x = (*value - pdf.range.min) * mult;
00411                         if (x < 0.0)
00412                                 x = 0.0;
00413                         else if (x >= 1.0)
00414                                 x = 1.0;
00415 
00416                         pdf.distr[(unsigned int)(x * n + 0.5)] += weight;
00417                 }
00418         }
00419 }
00420 
00421 static void smoothArray(unsigned int n, double *values, unsigned int nTimes)
00422 {
00423         for(unsigned int iter = 0; iter < nTimes; iter++) {
00424                 double hold = n > 0 ? values[0] : 0.0;
00425                 for(unsigned int i = 0; i < n; i++) {
00426                         double delta = hold * 0.1;
00427                         double rem = 0.0;
00428                         if (i > 0) {
00429                                 values[i - 1] += delta;
00430                                 rem -= delta;
00431                         }
00432                         if (i < n - 1) {
00433                                 hold = values[i + 1];
00434                                 values[i + 1] += delta;
00435                                 rem -= delta;
00436                         }
00437                         values[i] += rem;
00438                 }
00439         }
00440 }
00441 
00442 void ProcLikelihood::trainEnd()
00443 {
00444         bool done = true;
00445         if (iteration == ITER_FILL)
00446                 iteration = ITER_DONE;
00447 
00448         for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00449             iter != pdfs.end(); iter++) {
00450                 switch(iter->iteration) {
00451                     case ITER_EMPTY:
00452                     case ITER_RANGE:
00453                         iter->background.range = iter->signal.range;
00454                         iter->iteration = ITER_FILL;
00455                         done = false;
00456                         break;
00457                     case ITER_FILL:
00458                         iter->signal.distr.front() *= 2;
00459                         iter->signal.distr.back() *= 2;
00460                         smoothArray(iter->signal.distr.size(),
00461                                     &iter->signal.distr.front(),
00462                                     iter->smooth);
00463 
00464                         iter->background.distr.front() *= 2;
00465                         iter->background.distr.back() *= 2;
00466                         smoothArray(iter->background.distr.size(),
00467                                     &iter->background.distr.front(),
00468                                     iter->smooth);
00469 
00470                         iter->iteration = ITER_DONE;
00471                         break;
00472                     default:
00473                         /* shut up */;
00474                 }
00475         }
00476 
00477         if (done)
00478                 trained = true;
00479 
00480         if (done && monitoring) {
00481                 std::vector<SourceVariable*> inputs = getInputs().get();
00482                 if (categoryIdx >= 0)
00483                         inputs.erase(inputs.begin() + categoryIdx);
00484 
00485                 for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00486                     iter != pdfs.end(); iter++) {
00487                         unsigned int idx = iter - pdfs.begin();
00488                         unsigned int catIdx = idx % nCategories;
00489                         unsigned int varIdx = idx / nCategories;
00490                         SourceVariable *var = inputs[varIdx];
00491                         std::string name =
00492                                 (const char*)var->getSource()->getName()
00493                                 + std::string("_")
00494                                 + (const char*)var->getName();
00495                         std::string title = name;
00496                         if (categoryIdx >= 0) {
00497                                 name += Form("_CAT%d", catIdx);
00498                                 title += Form(" (cat. %d)", catIdx);
00499                         }
00500 
00501                         unsigned int n = iter->signal.distr.size() - 1;
00502                         double min = iter->signal.range.min -
00503                                      0.5 * iter->signal.range.width() / n;
00504                         double max = iter->signal.range.max +
00505                                      0.5 * iter->signal.range.width() / n;
00506                         TH1F *histo = monitoring->book<TH1F>(name + "_sig",
00507                                 (name + "_sig").c_str(),
00508                                 (title + " signal").c_str(), n + 1, min, max);
00509                         for(unsigned int i = 0; i < n; i++)
00510                                 histo->SetBinContent(
00511                                         i + 1, iter->signal.distr[i]);
00512 
00513                         n = iter->background.distr.size() - 1;
00514                         min = iter->background.range.min -
00515                               0.5 * iter->background.range.width() / n;
00516                         max = iter->background.range.max +
00517                               0.5 * iter->background.range.width() / n;
00518                         histo = monitoring->book<TH1F>(name + "_bkg",
00519                                 (name + "_bkg").c_str(),
00520                                 (title + " background").c_str(),
00521                                 n + 1, min, max);
00522                         for(unsigned int i = 0; i < n; i++)
00523                                 histo->SetBinContent(
00524                                         i + 1, iter->background.distr[i]);
00525                 }
00526         }
00527 }
00528 
00529 static void xmlParsePDF(ProcLikelihood::PDF &pdf, DOMElement *elem)
00530 {
00531         if (!elem ||
00532             std::strcmp(XMLSimpleStr(elem->getNodeName()), "pdf") != 0)
00533                 throw cms::Exception("ProcLikelihood")
00534                         << "Expected pdf tag in sigbkg train data."
00535                         << std::endl;
00536 
00537         pdf.range.min = XMLDocument::readAttribute<double>(elem, "lower");
00538         pdf.range.max = XMLDocument::readAttribute<double>(elem, "upper");
00539 
00540         pdf.distr.clear();
00541         for(DOMNode *node = elem->getFirstChild();
00542             node; node = node->getNextSibling()) {
00543                 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00544                         continue;
00545 
00546                 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00547                                              "value") != 0)
00548                         throw cms::Exception("ProcLikelihood")
00549                                 << "Expected value tag in train file."
00550                                 << std::endl;
00551 
00552                 pdf.distr.push_back(XMLDocument::readContent<double>(node));
00553         }
00554 }
00555 
00556 namespace {
00557         struct Id {
00558                 AtomicId        source;
00559                 AtomicId        name;
00560                 unsigned int    category;
00561 
00562                 inline Id(AtomicId source, AtomicId name,
00563                           unsigned int category) :
00564                         source(source), name(name), category(category) {}
00565 
00566                 inline bool operator == (const Id &other) const
00567                 {
00568                         return source == other.source &&
00569                                name == other.name &&
00570                                category == other.category;
00571                 }
00572 
00573                 inline bool operator < (const Id &other) const
00574                 {
00575                         if (source < other.source)
00576                                 return true;
00577                         if (!(source == other.source))
00578                                 return false;
00579                         if (name < other.name)
00580                                 return true;
00581                         if (!(name == other.name))
00582                                 return false;
00583                         return category < other.category;
00584                 }
00585         };
00586 }
00587 
00588 bool ProcLikelihood::load()
00589 {
00590         std::string filename = trainer->trainFileName(this, "xml");
00591         if (!exists(filename))
00592                 return false;
00593 
00594         XMLDocument xml(filename);
00595         DOMElement *elem = xml.getRootNode();
00596         if (std::strcmp(XMLSimpleStr(elem->getNodeName()),
00597                                      "ProcLikelihood") != 0)
00598                 throw cms::Exception("ProcLikelihood")
00599                         << "XML training data file has bad root node."
00600                         << std::endl;
00601 
00602         unsigned int version = XMLDocument::readAttribute<unsigned int>(
00603                                                         elem, "version", 1);
00604 
00605         if (version < 1 || version > 2)
00606                 throw cms::Exception("ProcLikelihood")
00607                         << "Unsupported version " << version
00608                         << "in train file." << std::endl;
00609 
00610         DOMNode *node;
00611         for(node = elem->getFirstChild();
00612             node; node = node->getNextSibling()) {
00613                 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00614                         continue;
00615 
00616                 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00617                                 "categories") != 0)
00618                         throw cms::Exception("ProcLikelihood")
00619                                 << "Expected categories tag in train file."
00620                                 << std::endl;
00621 
00622                 unsigned int i = 0;
00623                 for(DOMNode *subNode = node->getFirstChild();
00624                     subNode; subNode = subNode->getNextSibling()) {
00625                         if (subNode->getNodeType() != DOMNode::ELEMENT_NODE)
00626                                 continue;
00627 
00628                         if (i >= nCategories)
00629                                 throw cms::Exception("ProcLikelihood")
00630                                         << "Too many categories in train "
00631                                            "file." << std::endl;
00632 
00633                         if (std::strcmp(XMLSimpleStr(subNode->getNodeName()),
00634                                         "category") != 0)
00635                                 throw cms::Exception("ProcLikelihood")
00636                                         << "Expected category tag in train "
00637                                            "file." << std::endl;
00638 
00639                         elem = static_cast<DOMElement*>(subNode);
00640 
00641                         sigSum[i] = XMLDocument::readAttribute<double>(
00642                                                         elem, "signal");
00643                         bkgSum[i] = XMLDocument::readAttribute<double>(
00644                                                         elem, "background");
00645                         i++;
00646                 }
00647                 if (i < nCategories)
00648                         throw cms::Exception("ProcLikelihood")
00649                                 << "Too few categories in train file."
00650                                 << std::endl;
00651 
00652                 break;
00653         }
00654 
00655         std::map<Id, SigBkg*> pdfMap;
00656 
00657         for(std::vector<SigBkg>::iterator iter = pdfs.begin();
00658             iter != pdfs.end(); ++iter) {
00659                 SigBkg *ptr = &*iter;
00660                 unsigned int idx = iter - pdfs.begin();
00661                 unsigned int catIdx = idx % nCategories;
00662                 unsigned int varIdx = idx / nCategories;
00663                 if (categoryIdx >= 0 && (int)varIdx >= categoryIdx)
00664                         varIdx++;
00665                 const SourceVariable *var = getInputs().get()[varIdx];
00666                 Id id(var->getSource()->getName(), var->getName(), catIdx);
00667 
00668                 pdfMap[id] = ptr;
00669         }
00670 
00671         std::vector<SigBkg>::iterator cur = pdfs.begin();
00672 
00673         for(node = node->getNextSibling();
00674             node; node = node->getNextSibling()) {
00675                 if (node->getNodeType() != DOMNode::ELEMENT_NODE)
00676                         continue;
00677 
00678                 if (std::strcmp(XMLSimpleStr(node->getNodeName()),
00679                                 "sigbkg") != 0)
00680                         throw cms::Exception("ProcLikelihood")
00681                                 << "Expected sigbkg tag in train file."
00682                                 << std::endl;
00683                 elem = static_cast<DOMElement*>(node);
00684 
00685                 SigBkg *pdf = 0;
00686                 switch(version) {
00687                     case 1:
00688                         if (cur == pdfs.end())
00689                                 throw cms::Exception("ProcLikelihood")
00690                                         << "Superfluous SigBkg in train data."
00691                                         << std::endl;
00692                         pdf = &*cur++;
00693                         break;
00694                     case 2: {
00695                         Id id(XMLDocument::readAttribute<std::string>(
00696                                                         elem, "source"),
00697                               XMLDocument::readAttribute<std::string>(
00698                                                         elem, "name"),
00699                               XMLDocument::readAttribute<unsigned int>(
00700                                                         elem, "category", 0));
00701                         std::map<Id, SigBkg*>::const_iterator pos =
00702                                                         pdfMap.find(id);
00703                         if (pos == pdfMap.end())
00704                                 continue;
00705                         else
00706                                 pdf = pos->second;
00707                     }   break;
00708                 }
00709 
00710                 for(node = elem->getFirstChild();
00711                     node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00712                     node = node->getNextSibling());
00713                 DOMElement *elemSig =
00714                                 node ? static_cast<DOMElement*>(node) : 0;
00715 
00716                 for(node = node->getNextSibling();
00717                     node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00718                     node = node->getNextSibling());
00719                 while(node && node->getNodeType() != DOMNode::ELEMENT_NODE)
00720                         node = node->getNextSibling();
00721                 DOMElement *elemBkg =
00722                                 node ? static_cast<DOMElement*>(node) : 0;
00723 
00724                 for(node = node->getNextSibling();
00725                     node && node->getNodeType() != DOMNode::ELEMENT_NODE;
00726                     node = node->getNextSibling());
00727                 if (node)
00728                         throw cms::Exception("ProcLikelihood")
00729                                 << "Superfluous tags in sigbkg train data."
00730                                 << std::endl;
00731 
00732                 xmlParsePDF(pdf->signal, elemSig);
00733                 xmlParsePDF(pdf->background, elemBkg);
00734 
00735                 pdf->iteration = ITER_DONE;
00736 
00737                 node = elem;
00738         }
00739 
00740         if (version == 1 && cur != pdfs.end())
00741                 throw cms::Exception("ProcLikelihood")
00742                         << "Missing SigBkg in train data." << std::endl;
00743 
00744         iteration = ITER_DONE;
00745         trained = true;
00746         for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
00747             iter != pdfs.end(); ++iter) {
00748                 if (iter->iteration != ITER_DONE) {
00749                         trained = false;
00750                         break;
00751                 }
00752         }
00753 
00754         return true;
00755 }
00756 
00757 static DOMElement *xmlStorePDF(DOMDocument *doc,
00758                                const ProcLikelihood::PDF &pdf)
00759 {
00760         DOMElement *elem = doc->createElement(XMLUniStr("pdf"));
00761 
00762         XMLDocument::writeAttribute(elem, "lower", pdf.range.min);
00763         XMLDocument::writeAttribute(elem, "upper", pdf.range.max);
00764 
00765         for(std::vector<double>::const_iterator iter =
00766             pdf.distr.begin(); iter != pdf.distr.end(); iter++) {
00767                 DOMElement *value = doc->createElement(XMLUniStr("value"));
00768                 elem->appendChild(value);       
00769 
00770                 XMLDocument::writeContent<double>(value, doc, *iter);
00771         }
00772 
00773         return elem;
00774 }
00775 
00776 void ProcLikelihood::save()
00777 {
00778         XMLDocument xml(trainer->trainFileName(this, "xml"), true);
00779         DOMDocument *doc = xml.createDocument("ProcLikelihood");
00780         XMLDocument::writeAttribute(doc->getDocumentElement(), "version", 2);
00781 
00782         DOMElement *elem = doc->createElement(XMLUniStr("categories"));
00783         xml.getRootNode()->appendChild(elem);
00784         for(unsigned int i = 0; i < nCategories; i++) {
00785                 DOMElement *category = doc->createElement(XMLUniStr("category"));
00786                 elem->appendChild(category);
00787                 XMLDocument::writeAttribute(category, "signal", sigSum[i]);
00788                 XMLDocument::writeAttribute(category, "background", bkgSum[i]);
00789         }
00790 
00791         for(std::vector<SigBkg>::const_iterator iter = pdfs.begin();
00792             iter != pdfs.end(); iter++) {
00793                 elem = doc->createElement(XMLUniStr("sigbkg"));
00794                 xml.getRootNode()->appendChild(elem);
00795 
00796                 unsigned int idx = iter - pdfs.begin();
00797                 unsigned int catIdx = idx % nCategories;
00798                 unsigned int varIdx = idx / nCategories;
00799                 if (categoryIdx >= 0 && (int)varIdx >= categoryIdx)
00800                         varIdx++;
00801                 const SourceVariable *var = getInputs().get()[varIdx];
00802                 XMLDocument::writeAttribute(elem, "source",
00803                                 (const char*)var->getSource()->getName());
00804                 XMLDocument::writeAttribute(elem, "name",
00805                                 (const char*)var->getName());
00806                 if (categoryIdx >= 0)
00807                         XMLDocument::writeAttribute(elem, "category", catIdx);
00808 
00809                 elem->appendChild(xmlStorePDF(doc, iter->signal));
00810                 elem->appendChild(xmlStorePDF(doc, iter->background));
00811         }
00812 }
00813 
00814 } // anonymous namespace

Generated on Tue Jun 9 17:41:28 2009 for CMSSW by  doxygen 1.5.4