CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Attributes | Private Member Functions | Private Attributes
PhysicsTools::MVATrainer Class Reference

#include <MVATrainer.h>

Classes

struct  CalibratedProcessor
 

Public Member Functions

TrainerMonitoring::ModulebookMonitor (const std::string &name)
 
void doneTraining (Calibration::MVAComputer *trainCalibration) const
 
Calibration::MVAComputergetCalibration () const
 
const std::string & getName () const
 
Calibration::MVAComputergetTrainCalibration () const
 
void loadState ()
 
 MVATrainer (const std::string &fileName, bool useXSLT=false, const char *styleSheet=0)
 
void saveState ()
 
void setAutoSave (bool autoSave)
 
void setCleanup (bool cleanup)
 
void setCrossValidation (double split)
 
void setMonitoring (bool monitoring)
 
void setRandomSeed (UInt_t seed)
 
std::string trainFileName (const TrainProcessor *proc, const std::string &ext, const std::string &arg="") const
 
 ~MVATrainer ()
 

Static Public Attributes

static const AtomicId kTargetId
 
static const AtomicId kWeightId
 

Private Member Functions

void connectProcessors (Calibration::MVAComputer *calib, const std::vector< CalibratedProcessor > &procs, bool withTarget) const
 
SourceVariablecreateVariable (Source *source, AtomicId name, Variable::Flags flags)
 
void fillInputVars (SourceVariableSet &vars, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
 
void fillOutputVars (SourceVariableSet &vars, Source *source, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
 
std::vector< AtomicIdfindFinalProcessors () const
 
void findUntrainedComputers (std::vector< AtomicId > &compute, std::vector< AtomicId > &train) const
 
SourceVariablegetVariable (AtomicId source, AtomicId name) const
 
void makeProcessor (XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, AtomicId id, const char *name)
 
Calibration::MVAComputermakeTrainCalibration (const AtomicId *compute, const AtomicId *train) const
 

Private Attributes

double crossValidation
 
bool doAutoSave
 
bool doCleanup
 
bool doMonitoring
 
Sourceinput
 
std::unique_ptr< TrainerMonitoringmonitoring
 
std::string name
 
TrainProcessoroutput
 
std::vector< AtomicIdprocessors
 
UInt_t randomSeed
 
std::map< AtomicId, Source * > sources
 
std::string trainFileMask
 
std::vector< SourceVariable * > variables
 
std::unique_ptr< XMLDocumentxml
 

Detailed Description

Definition at line 27 of file MVATrainer.h.

Constructor & Destructor Documentation

PhysicsTools::MVATrainer::MVATrainer ( const std::string &  fileName,
bool  useXSLT = false,
const char *  styleSheet = 0 
)

Definition at line 422 of file MVATrainer.cc.

References ws_sso_content_reader::content, createVariable(), HTMLExport::elem(), PhysicsTools::escape(), Exception, fillInputVars(), fillOutputVars(), PhysicsTools::Variable::FLAG_NONE, PhysicsTools::Variable::FLAG_OPTIONAL, edm::FileInPath::fullPath(), PhysicsTools::Source::getInputs(), input, PhysicsTools::SourceVariableSet::kTarget, kTargetId, PhysicsTools::SourceVariableSet::kWeight, kWeightId, makeProcessor(), name, output, PFJetToCaloProducer_cfi::Source, sources, AlCaHLTBitMon_QueryRunRegistry::string, trainFileMask, and xml.

423  :
424  input(0), output(0), name("MVATrainer"),
425  doAutoSave(true), doCleanup(false),
426  doMonitoring(false), randomSeed(65539), crossValidation(0.0)
427 {
428  if (useXSLT) {
429  std::string sheet;
430  if (!styleSheet)
431  sheet = edm::FileInPath(
432  "PhysicsTools/MVATrainer/data/MVATrainer.xsl")
433  .fullPath();
434  else
435  sheet = styleSheet;
436 
437  std::string preproc = "xsltproc --xinclude " + escape(sheet) +
438  " " + escape(fileName);
439  xml.reset(new XMLDocument(fileName, preproc));
440  } else
441  xml.reset(new XMLDocument(fileName));
442 
443  DOMNode *node = xml->getRootNode();
444 
445  if (std::strcmp(XMLSimpleStr(node->getNodeName()), "MVATrainer") != 0)
446  throw cms::Exception("MVATrainer")
447  << "Invalid XML root node." << std::endl;
448 
449  enum State {
450  STATE_GENERAL,
451  STATE_FIRST,
452  STATE_MIDDLE,
453  STATE_LAST
454  } state = STATE_GENERAL;
455 
456  for(node = node->getFirstChild();
457  node; node = node->getNextSibling()) {
458  if (node->getNodeType() != DOMNode::ELEMENT_NODE)
459  continue;
460 
461  std::string name = XMLSimpleStr(node->getNodeName());
462  DOMElement *elem = static_cast<DOMElement*>(node);
463 
464  switch(state) {
465  case STATE_GENERAL: {
466  if (name != "general")
467  throw cms::Exception("MVATrainer")
468  << "Expected general config as first "
469  "tag." << std::endl;
470 
471  for(DOMNode *subNode = elem->getFirstChild();
472  subNode; subNode = subNode->getNextSibling()) {
473  if (subNode->getNodeType() !=
474  DOMNode::ELEMENT_NODE)
475  continue;
476 
477  if (std::strcmp(XMLSimpleStr(
478  subNode->getNodeName()), "option") != 0)
479  throw cms::Exception("MVATrainer")
480  << "Expected option tag."
481  << std::endl;
482 
483  elem = static_cast<DOMElement*>(subNode);
484  name = XMLDocument::readAttribute<std::string>(
485  elem, "name");
487  elem->getTextContent());
488 
489  if (name == "id")
490  this->name = content;
491  else if (name == "trainfiles")
493  else
494  throw cms::Exception("MVATrainer")
495  << "Unknown option \""
496  << name << "\"." << std::endl;
497  }
498 
499  state = STATE_FIRST;
500  } break;
501  case STATE_FIRST: {
502  if (name != "input")
503  throw cms::Exception("MVATrainer")
504  << "Expected input config as second "
505  "tag." << std::endl;
506 
507  AtomicId id = XMLDocument::readAttribute<std::string>(
508  elem, "id");
509  input = new Source(id, true);
510  input->getOutputs().append(
511  createVariable(input, kTargetId,
514  input->getOutputs().append(
515  createVariable(input, kWeightId,
518  sources.insert(std::make_pair(id, input));
519  fillOutputVars(input->getOutputs(), input, elem);
520 
521  state = STATE_MIDDLE;
522  } break;
523  case STATE_MIDDLE: {
524  if (name == "output") {
525  AtomicId zero;
526  output = new TrainProcessor("output",
527  &zero, this);
529  state = STATE_LAST;
530  continue;
531  } else if (name != "processor")
532  throw cms::Exception("MVATrainer")
533  << "Unexpected tag after input "
534  "config." << std::endl;
535 
536  AtomicId id = XMLDocument::readAttribute<std::string>(
537  elem, "id");
538  std::string name =
539  XMLDocument::readAttribute<std::string>(
540  elem, "name");
541 
542  makeProcessor(elem, id, name.c_str());
543  } break;
544  case STATE_LAST:
545  throw cms::Exception("MVATrainer")
546  << "Unexpected tag found after output."
547  << std::endl;
548  break;
549  }
550  }
551 
552  if (state == STATE_FIRST)
553  throw cms::Exception("MVATrainer")
554  << "Expected input variable config." << std::endl;
555  else if (state == STATE_MIDDLE)
556  throw cms::Exception("MVATrainer")
557  << "Expected output variable config." << std::endl;
558 
559  if (trainFileMask.empty())
560  trainFileMask = this->name + "_%s%s.%s";
561 }
static std::string escape(const std::string &in)
Definition: MVATrainer.cc:405
static const AtomicId kTargetId
Definition: MVATrainer.h:59
const SourceVariableSet & getInputs() const
Definition: Source.h:26
void fillOutputVars(SourceVariableSet &vars, Source *source, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
Definition: MVATrainer.cc:838
static const AtomicId kWeightId
Definition: MVATrainer.h:60
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:18
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
std::unique_ptr< XMLDocument > xml
Definition: MVATrainer.h:107
void fillInputVars(SourceVariableSet &vars, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
Definition: MVATrainer.cc:746
void makeProcessor(XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *elem, AtomicId id, const char *name)
Definition: MVATrainer.cc:622
std::string trainFileMask
Definition: MVATrainer.h:108
State
Definition: hltDiff.cc:287
TrainProcessor * output
Definition: MVATrainer.h:104
std::string fullPath() const
Definition: FileInPath.cc:184
SourceVariable * createVariable(Source *source, AtomicId name, Variable::Flags flags)
Definition: MVATrainer.cc:734
PhysicsTools::MVATrainer::~MVATrainer ( )

Definition at line 563 of file MVATrainer.cc.

References PhysicsTools::TrainProcessor::cleanup(), doCleanup, monitoring, output, proc, sources, and variables.

564 {
565  if (monitoring.get())
566  monitoring->write();
567 
568  for(std::map<AtomicId, Source*>::const_iterator iter = sources.begin();
569  iter != sources.end(); iter++) {
570  TrainProcessor *proc =
571  dynamic_cast<TrainProcessor*>(iter->second);
572 
573  if (proc && doCleanup)
574  proc->cleanup();
575 
576  delete iter->second;
577  }
578  delete output;
579  std::for_each(variables.begin(), variables.end(),
581 }
TrainProcessor *const proc
Definition: MVATrainer.cc:101
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
std::unique_ptr< TrainerMonitoring > monitoring
Definition: MVATrainer.h:106
std::vector< SourceVariable * > variables
Definition: MVATrainer.h:101
TrainProcessor * output
Definition: MVATrainer.h:104

Member Function Documentation

TrainerMonitoring::Module * PhysicsTools::MVATrainer::bookMonitor ( const std::string &  name)

Definition at line 710 of file MVATrainer.cc.

References doMonitoring, MillePedeFileConverter_cfg::fileName, monitoring, PhysicsTools::stdStringPrintf(), AlCaHLTBitMon_QueryRunRegistry::string, and trainFileMask.

Referenced by PhysicsTools::TrainProcessor::doTrainBegin(), and getName().

711 {
712  if (!doMonitoring)
713  return 0;
714 
715  if (!monitoring.get()) {
718  "monitoring", "", "root");
719  monitoring.reset(new TrainerMonitoring(fileName));
720  }
721 
722  return monitoring->book(name);
723 }
std::unique_ptr< TrainerMonitoring > monitoring
Definition: MVATrainer.h:106
std::string trainFileMask
Definition: MVATrainer.h:108
static std::string stdStringPrintf(const char *format,...)
Definition: MVATrainer.cc:183
void PhysicsTools::MVATrainer::connectProcessors ( Calibration::MVAComputer calib,
const std::vector< CalibratedProcessor > &  procs,
bool  withTarget 
) const
private

Definition at line 885 of file MVATrainer.cc.

References PhysicsTools::Calibration::MVAComputer::addProcessor(), calib, PhysicsTools::Calibration::convert(), Exception, PhysicsTools::SourceVariableSet::get(), PhysicsTools::Variable::getFlags(), PhysicsTools::Source::getInputs(), PhysicsTools::Variable::getName(), PhysicsTools::Source::getOutputs(), mps_fire::i, input, PhysicsTools::Calibration::MVAComputer::inputSet, plotBeamSpotDB::last, genParticles_cff::map, PhysicsTools::Calibration::Variable::name, output, PhysicsTools::Calibration::MVAComputer::output, PhysicsTools::SourceVariableSet::size(), findQualityFiles::size, JetChargeProducer_cfi::var, and variables.

Referenced by getCalibration(), and makeTrainCalibration().

888 {
889  std::map<SourceVariable*, unsigned int> vars;
890  unsigned int size = 0;
891 
892  MVATrainerComputer *trainCalib =
893  dynamic_cast<MVATrainerComputer*>(calib);
894 
895  for(unsigned int i = 0;
896  i < input->getOutputs().size(true); i++) {
897  if (i < 2 && !withTarget)
898  continue;
899 
900  SourceVariable *var = variables[i];
901  vars[var] = size++;
902 
903  Calibration::Variable calibVar;
904  calibVar.name = (const char*)var->getName();
905  calib->inputSet.push_back(calibVar);
906  if (trainCalib)
907  trainCalib->addFlag(var->getFlags());
908  }
909 
910  for(std::vector<CalibratedProcessor>::const_iterator iter =
911  procs.begin(); iter != procs.end(); iter++) {
912  bool isInterceptor = dynamic_cast<BaseInterceptor*>(
913  iter->calib) != 0;
914 
915  BitSet inputSet(size);
916 
917  unsigned int last = 0;
918  std::vector<SourceVariable*> inoutVars;
919  if (iter->processor)
920  inoutVars = iter->processor->getInputs().get(
921  isInterceptor);
922  for(std::vector<SourceVariable*>::const_iterator iter2 =
923  inoutVars.begin(); iter2 != inoutVars.end(); iter2++) {
924  std::map<SourceVariable*,
925  unsigned int>::const_iterator pos =
926  vars.find(*iter2);
927 
928  assert(pos != vars.end());
929 
930  if (pos->second < last)
931  throw cms::Exception("MVATrainer")
932  << "Input variables not declared "
933  "in order of appearance in \""
934  << (const char*)iter->processor->getName()
935  << "\"." << std::endl;
936 
937  inputSet[last = pos->second] = true;
938  }
939 
940  assert(!isInterceptor || withTarget);
941 
942  iter->calib->inputVars = Calibration::convert(inputSet);
943 
944  calib->output = size;
945 
946  if (isInterceptor) {
947  size++;
948  continue;
949  }
950 
951  calib->addProcessor(iter->calib);
952 
953  inoutVars = iter->processor->getOutputs().get();
954  for(std::vector<SourceVariable*>::const_iterator iter =
955  inoutVars.begin(); iter != inoutVars.end(); iter++) {
956 
957  vars[*iter] = size++;
958  }
959  }
960 
961  if (output->getInputs().size() != 1)
962  throw cms::Exception("MVATrainer")
963  << "Exactly one output variable has to be specified."
964  << std::endl;
965 
966  SourceVariable *outVar = output->getInputs().get()[0];
967  std::map<SourceVariable*, unsigned int>::const_iterator pos =
968  vars.find(outVar);
969  if (pos != vars.end())
970  calib->output = pos->second;
971 }
size
Write out results.
const SourceVariableSet & getInputs() const
Definition: Source.h:26
MVATrainerComputer * calib
Definition: MVATrainer.cc:64
const SourceVariableSet & getOutputs() const
Definition: Source.h:27
std::vector< SourceVariable * > get(bool withMagic=false) const
size_type size(bool withMagic=false) const
std::vector< SourceVariable * > variables
Definition: MVATrainer.h:101
PhysicsTools::BitSet convert(const BitSet &bitSet)
constructs BitSet container from persistent representation
Definition: BitSet.cc:38
TrainProcessor * output
Definition: MVATrainer.h:104
SourceVariable * PhysicsTools::MVATrainer::createVariable ( Source source,
AtomicId  name,
Variable::Flags  flags 
)
private

Definition at line 734 of file MVATrainer.cc.

References PhysicsTools::Source::getName(), getVariable(), name, JetChargeProducer_cfi::var, and variables.

Referenced by fillOutputVars(), and MVATrainer().

736 {
737  SourceVariable *var = getVariable(source->getName(), name);
738  if (var)
739  return 0;
740 
741  var = new SourceVariable(source, name, flags);
742  variables.push_back(var);
743  return var;
744 }
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
SourceVariable * getVariable(AtomicId source, AtomicId name) const
Definition: MVATrainer.cc:725
std::vector< SourceVariable * > variables
Definition: MVATrainer.h:101
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::doneTraining ( Calibration::MVAComputer trainCalibration) const

Definition at line 1102 of file MVATrainer.cc.

References Exception.

Referenced by setCrossValidation().

1103 {
1104  MVATrainerComputer *calib =
1105  dynamic_cast<MVATrainerComputer*>(trainCalibration);
1106 
1107  if (!calib)
1108  throw cms::Exception("MVATrainer")
1109  << "Invalid training calibration passed to "
1110  "doneTraining()" << std::endl;
1111 
1112  calib->done();
1113 }
void PhysicsTools::MVATrainer::fillInputVars ( SourceVariableSet vars,
XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *  xml 
)
private

Definition at line 746 of file MVATrainer.cc.

References PhysicsTools::SourceVariableSet::append(), HTMLExport::elem(), Exception, spr::find(), PhysicsTools::Source::getOutput(), getVariable(), input, PhysicsTools::SourceVariableSet::kRegular, PhysicsTools::SourceVariableSet::kTarget, kTargetId, PhysicsTools::SourceVariableSet::kWeight, kWeightId, gen::n, name, source, edmPickEvents::target, tmp, JetChargeProducer_cfi::var, and variables.

Referenced by makeProcessor(), and MVATrainer().

748 {
749  std::vector<SourceVariable*> tmp;
750  SourceVariable *target = 0;
751  SourceVariable *weight = 0;
752 
753  for(DOMNode *node = xml->getFirstChild(); node;
754  node = node->getNextSibling()) {
755  if (node->getNodeType() != DOMNode::ELEMENT_NODE)
756  continue;
757 
758  if (std::strcmp(XMLSimpleStr(node->getNodeName()), "var") != 0)
759  throw cms::Exception("MVATrainer")
760  << "Invalid input variable node." << std::endl;
761 
762  DOMElement *elem = static_cast<DOMElement*>(node);
763 
764  AtomicId source = XMLDocument::readAttribute<std::string>(
765  elem, "source");
766  AtomicId name = XMLDocument::readAttribute<std::string>(
767  elem, "name");
768 
769  SourceVariable *var = getVariable(source, name);
770  if (!var)
771  throw cms::Exception("MVATrainer")
772  << "Input variable " << (const char*)source
773  << ":" << (const char*)name
774  << " not found." << std::endl;
775 
776  if (XMLDocument::readAttribute<bool>(elem, "target", false)) {
777  if (target)
778  throw cms::Exception("MVATrainer")
779  << "Target variable defined twice"
780  << std::endl;
781  target = var;
782  }
783  if (XMLDocument::readAttribute<bool>(elem, "weight", false)) {
784  if (weight)
785  throw cms::Exception("MVATrainer")
786  << "Weight variable defined twice"
787  << std::endl;
788  weight = var;
789  }
790 
791  tmp.push_back(var);
792  }
793 
794  if (!weight) {
795  weight = input->getOutput(kWeightId);
796  assert(weight);
797  tmp.insert(tmp.begin() +
798  (target == input->getOutput(kTargetId)),
799  1, weight);
800  }
801  if (!target) {
802  target = input->getOutput(kTargetId);
803  assert(target);
804  tmp.insert(tmp.begin(), 1, target);
805  }
806 
807  unsigned int n = 0;
808  for(std::vector<SourceVariable*>::const_iterator iter = variables.begin();
809  iter != variables.end(); iter++) {
810  std::vector<SourceVariable*>::const_iterator pos =
811  std::find(tmp.begin(), tmp.end(), *iter);
812  if (pos == tmp.end())
813  continue;
814 
816  if (*iter == target)
818  else if (*iter == weight)
820  else
822 
823  if (vars.append(*iter, magic, pos - tmp.begin())) {
824  AtomicId source = (*iter)->getSource()->getName();
825  AtomicId name = (*iter)->getName();
826  throw cms::Exception("MVATrainer")
827  << "Input variable " << (const char*)source
828  << ":" << (const char*)name
829  << " defined twice." << std::endl;
830  }
831 
832  n++;
833  }
834 
835  assert(tmp.size() == n);
836 }
Definition: weight.py:1
SourceVariable * getVariable(AtomicId source, AtomicId name) const
Definition: MVATrainer.cc:725
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
static const AtomicId kTargetId
Definition: MVATrainer.h:59
SourceVariable * getOutput(AtomicId name) const
Definition: Source.h:21
static const AtomicId kWeightId
Definition: MVATrainer.h:60
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:18
std::unique_ptr< XMLDocument > xml
Definition: MVATrainer.h:107
std::vector< SourceVariable * > variables
Definition: MVATrainer.h:101
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::fillOutputVars ( SourceVariableSet vars,
Source source,
XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *  xml 
)
private

Definition at line 838 of file MVATrainer.cc.

References PhysicsTools::SourceVariableSet::append(), createVariable(), HTMLExport::elem(), Exception, PhysicsTools::Variable::FLAG_MULTIPLE, PhysicsTools::Variable::FLAG_NONE, PhysicsTools::Variable::FLAG_OPTIONAL, PhysicsTools::Source::getName(), PhysicsTools::isMagic(), name, and JetChargeProducer_cfi::var.

Referenced by makeProcessor(), and MVATrainer().

840 {
841  for(DOMNode *node = xml->getFirstChild(); node;
842  node = node->getNextSibling()) {
843  if (node->getNodeType() != DOMNode::ELEMENT_NODE)
844  continue;
845 
846  if (std::strcmp(XMLSimpleStr(node->getNodeName()), "var") != 0)
847  throw cms::Exception("MVATrainer")
848  << "Invalid output variable node."
849  << std::endl;
850 
851  DOMElement *elem = static_cast<DOMElement*>(node);
852 
853  AtomicId name = XMLDocument::readAttribute<std::string>(
854  elem, "name");
855  if (!name)
856  throw cms::Exception("MVATrainer")
857  << "Output variable tag missing name."
858  << std::endl;
859  if (isMagic(name))
860  throw cms::Exception("MVATrainer")
861  << "Cannot use magic variable names in output."
862  << std::endl;
863 
865 
866  if (XMLDocument::readAttribute<bool>(elem, "optional", true))
868  (flags | Variable::FLAG_OPTIONAL);
869 
870  if (XMLDocument::readAttribute<bool>(elem, "multiple", true))
872  (flags | Variable::FLAG_MULTIPLE);
873 
874  SourceVariable *var = createVariable(source, name, flags);
875  if (!var || vars.append(var))
876  throw cms::Exception("MVATrainer")
877  << "Output variable "
878  << (const char*)source->getName()
879  << ":" << (const char*)name
880  << " defined twice." << std::endl;
881  }
882 }
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:18
std::unique_ptr< XMLDocument > xml
Definition: MVATrainer.h:107
static std::string const source
Definition: EdmProvDump.cc:43
static bool isMagic(AtomicId id)
Definition: MVATrainer.cc:398
SourceVariable * createVariable(Source *source, AtomicId name, Variable::Flags flags)
Definition: MVATrainer.cc:734
std::vector< AtomicId > PhysicsTools::MVATrainer::findFinalProcessors ( ) const
private

Definition at line 1115 of file MVATrainer.cc.

References PhysicsTools::SourceVariableSet::get(), PhysicsTools::Source::inputs, PatBasicFWLiteJetAnalyzer_Selector_cfg::inputs, output, processors, mps_fire::result, source, and sources.

Referenced by getCalibration().

1116 {
1117  std::set<Source*> toCheck;
1118  toCheck.insert(output);
1119 
1120  std::set<Source*> done;
1121  while(!toCheck.empty()) {
1122  Source *source = *toCheck.begin();
1123  toCheck.erase(toCheck.begin());
1124 
1125  std::vector<SourceVariable*> inputs = source->inputs.get();
1126  for(std::vector<SourceVariable*>::const_iterator iter =
1127  inputs.begin(); iter != inputs.end(); ++iter) {
1128  source = (*iter)->getSource();
1129  if (done.insert(source).second)
1130  toCheck.insert(source);
1131  }
1132  }
1133 
1134  std::vector<AtomicId> result;
1135  for(std::vector<AtomicId>::const_iterator iter = processors.begin();
1136  iter != processors.end(); ++iter) {
1137  std::map<AtomicId, Source*>::const_iterator pos =
1138  sources.find(*iter);
1139  if (pos != sources.end() && done.count(pos->second))
1140  result.push_back(*iter);
1141  }
1142 
1143  return result;
1144 }
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
TrainProcessor * output
Definition: MVATrainer.h:104
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::findUntrainedComputers ( std::vector< AtomicId > &  compute,
std::vector< AtomicId > &  train 
) const
private

Definition at line 1195 of file MVATrainer.cc.

References doMonitoring, PhysicsTools::SourceVariableSet::get(), PhysicsTools::Source::getInputs(), input, PhysicsTools::Source::isTrained(), output, proc, processors, and sources.

Referenced by getTrainCalibration().

1197 {
1198  compute.clear();
1199  train.clear();
1200 
1201  std::set<Source*> trainedSources;
1202  trainedSources.insert(input);
1203 
1204  for(std::vector<AtomicId>::const_iterator iter =
1205  processors.begin(); iter != processors.end(); iter++) {
1206  std::map<AtomicId, Source*>::const_iterator pos =
1207  sources.find(*iter);
1208  assert(pos != sources.end());
1209  TrainProcessor *proc =
1210  dynamic_cast<TrainProcessor*>(pos->second);
1211  assert(proc);
1212 
1213  bool trainedDeps = true;
1214  std::vector<SourceVariable*> inputVars =
1215  proc->getInputs().get();
1216  for(std::vector<SourceVariable*>::const_iterator iter2 =
1217  inputVars.begin(); iter2 != inputVars.end(); iter2++) {
1218  if (trainedSources.find((*iter2)->getSource())
1219  == trainedSources.end()) {
1220  trainedDeps = false;
1221  break;
1222  }
1223  }
1224 
1225  if (!trainedDeps)
1226  continue;
1227 
1228  if (proc->isTrained()) {
1229  trainedSources.insert(proc);
1230  compute.push_back(proc->getName());
1231  } else
1232  train.push_back(proc->getName());
1233  }
1234 
1235  if (doMonitoring && !output->isTrained() &&
1236  trainedSources.find(output->getInputs().get()[0]->getSource())
1237  != trainedSources.end())
1238  train.push_back(kOutputId);
1239 }
bool isTrained() const
Definition: Source.h:24
TrainProcessor *const proc
Definition: MVATrainer.cc:101
const SourceVariableSet & getInputs() const
Definition: Source.h:26
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
std::vector< SourceVariable * > get(bool withMagic=false) const
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
def compute(min, max)
TrainProcessor * output
Definition: MVATrainer.h:104
static const AtomicId kOutputId("__OUTPUT__")
Calibration::MVAComputer * PhysicsTools::MVATrainer::getCalibration ( ) const

Definition at line 1146 of file MVATrainer.cc.

References begin, calib, connectProcessors(), end, spr::find(), findFinalProcessors(), foreach, createfilelist::int, proc, processors, source, and sources.

Referenced by PhysicsTools::MVATrainerContainerLooperImpl< Record_t >::produce(), setCrossValidation(), and PhysicsTools::TreeTrainer::train().

1147 {
1148  std::vector<CalibratedProcessor> processors;
1149 
1150  std::unique_ptr<Calibration::MVAComputer> calib(
1151  new Calibration::MVAComputer);
1152 
1153  std::vector<AtomicId> used = findFinalProcessors();
1154  for(std::vector<AtomicId>::const_iterator iter = used.begin();
1155  iter != used.end(); iter++) {
1156  std::map<AtomicId, Source*>::const_iterator pos =
1157  sources.find(*iter);
1158  assert(pos != sources.end());
1159  TrainProcessor *source =
1160  dynamic_cast<TrainProcessor*>(pos->second);
1161  assert(source);
1162  if (!source->isTrained())
1163  return 0;
1164 
1165  Calibration::VarProcessor *proc = source->getCalibration();
1166  if (!proc)
1167  continue;
1168 
1169  Calibration::ProcForeach *foreach =
1170  dynamic_cast<Calibration::ProcForeach*>(proc);
1171  if (foreach) {
1172  std::vector<AtomicId>::const_iterator begin =
1173  std::find(this->processors.begin(),
1174  this->processors.end(), *iter);
1175  assert(this->processors.end() - begin >
1176  (int)(foreach->nProcs + 1));
1177  ++begin;
1178  std::vector<AtomicId>::const_iterator end =
1179  begin + foreach->nProcs;
1180  foreach->nProcs = 0;
1181  for(std::vector<AtomicId>::const_iterator iter2 =
1182  iter; iter2 != used.end(); ++iter2)
1183  if (std::find(begin, end, *iter2) != end)
1184  foreach->nProcs++;
1185  }
1186 
1187  processors.push_back(CalibratedProcessor(source, proc));
1188  }
1189 
1190  connectProcessors(calib.get(), processors, false);
1191 
1192  return calib.release();
1193 }
TrainProcessor *const proc
Definition: MVATrainer.cc:101
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
MVATrainerComputer * calib
Definition: MVATrainer.cc:64
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
std::vector< AtomicId > findFinalProcessors() const
Definition: MVATrainer.cc:1115
#define end
Definition: vmac.h:37
void connectProcessors(Calibration::MVAComputer *calib, const std::vector< CalibratedProcessor > &procs, bool withTarget) const
Definition: MVATrainer.cc:885
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
#define begin
Definition: vmac.h:30
#define foreach
static std::string const source
Definition: EdmProvDump.cc:43
const std::string& PhysicsTools::MVATrainer::getName ( void  ) const
inline

Definition at line 53 of file MVATrainer.h.

References bookMonitor(), name, and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by plotting.Plot::draw().

53 { return name; }
Calibration::MVAComputer * PhysicsTools::MVATrainer::getTrainCalibration ( ) const

Definition at line 1241 of file MVATrainer.cc.

References bookConverter::compute(), findUntrainedComputers(), and makeTrainCalibration().

Referenced by PhysicsTools::TreeTrainer::iteration(), and setCrossValidation().

1242 {
1243  std::vector<AtomicId> compute, train;
1244  findUntrainedComputers(compute, train);
1245 
1246  if (train.empty())
1247  return 0;
1248 
1249  compute.push_back(0);
1250  train.push_back(0);
1251 
1252  return makeTrainCalibration(&compute.front(), &train.front());
1253 }
Calibration::MVAComputer * makeTrainCalibration(const AtomicId *compute, const AtomicId *train) const
Definition: MVATrainer.cc:974
def compute(min, max)
void findUntrainedComputers(std::vector< AtomicId > &compute, std::vector< AtomicId > &train) const
Definition: MVATrainer.cc:1195
SourceVariable * PhysicsTools::MVATrainer::getVariable ( AtomicId  source,
AtomicId  name 
) const
private

Definition at line 725 of file MVATrainer.cc.

References sources.

Referenced by createVariable(), and fillInputVars().

726 {
727  std::map<AtomicId, Source*>::const_iterator pos = sources.find(source);
728  if (pos == sources.end())
729  return 0;
730 
731  return pos->second->getOutput(name);
732 }
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::loadState ( )

Definition at line 583 of file MVATrainer.cc.

References processors, source, and sources.

Referenced by setCrossValidation().

584 {
585  for(std::vector<AtomicId>::const_iterator iter =
586  this->processors.begin();
587  iter != this->processors.end(); iter++) {
588  std::map<AtomicId, Source*>::const_iterator pos =
589  sources.find(*iter);
590  assert(pos != sources.end());
591  TrainProcessor *source =
592  dynamic_cast<TrainProcessor*>(pos->second);
593  assert(source);
594 
595  if (source->load())
596  edm::LogInfo("MVATrainer")
597  << source->getId() << " configuration for \""
598  << (const char*)source->getName()
599  << "\" loaded from file.";
600  }
601 }
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::makeProcessor ( XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *  elem,
AtomicId  id,
const char *  name 
)
private

Definition at line 622 of file MVATrainer.cc.

References PhysicsTools::ProcessRegistry< Base_t, CalibBase_t, Parent_t >::Factory::create(), HTMLExport::elem(), Exception, fillInputVars(), fillOutputVars(), proc, processors, sources, AlCaHLTBitMon_QueryRunRegistry::string, and GlobalPosition_Frontier_DevDB_cff::tag.

Referenced by MVATrainer().

623 {
624  DOMElement *xmlInput = 0;
625  DOMElement *xmlConfig = 0;
626  DOMElement *xmlOutput = 0;
627  DOMElement *xmlData = 0;
628 
629  static struct NameExpect {
630  const char *tag;
631  bool mandatory;
632  DOMElement **elem;
633  } const expect[] = {
634  { "input", true, &xmlInput },
635  { "config", true, &xmlConfig },
636  { "output", true, &xmlOutput },
637  { "data", false, &xmlData },
638  { 0, }
639  };
640 
641  const NameExpect *cur = expect;
642  for(DOMNode *node = elem->getFirstChild();
643  node; node = node->getNextSibling()) {
644  if (node->getNodeType() != DOMNode::ELEMENT_NODE)
645  continue;
646 
647  std::string tag = XMLSimpleStr(node->getNodeName());
648  DOMElement *elem = static_cast<DOMElement*>(node);
649 
650  if (!cur->tag)
651  throw cms::Exception("MVATrainer")
652  << "Superfluous tag " << tag
653  << "encountered in processor." << std::endl;
654  else if (tag != cur->tag && cur->mandatory)
655  throw cms::Exception("MVATrainer")
656  << "Expected tag " << cur->tag << ", got "
657  << tag << " instead in processor."
658  << std::endl;
659  else if (tag != cur->tag) {
660  cur++;
661  continue;
662  }
663  *(cur++)->elem = elem;
664  }
665 
666  while(cur->tag && !cur->mandatory)
667  cur++;
668  if (cur->tag)
669  throw cms::Exception("MVATrainer")
670  << "Unexpected end of processor configuration, "
671  << "expected tag " << cur->tag << "." << std::endl;
672 
673  std::unique_ptr<TrainProcessor> proc(
674  TrainProcessor::create(name, &id, this));
675  if (!proc.get())
676  throw cms::Exception("MVATrainer")
677  << "Variable processor trainer " << name
678  << " could not be instantiated. Most likely because"
679  " the trainer plugin for \"" << name << "\""
680  " does not exist." << std::endl;
681 
682  if (sources.find(id) != sources.end())
683  throw cms::Exception("MVATrainer")
684  << "Duplicate variable processor id "
685  << (const char*)id << "."
686  << std::endl;
687 
688  fillInputVars(proc->getInputs(), xmlInput);
689  fillOutputVars(proc->getOutputs(), proc.get(), xmlOutput);
690 
691  edm::LogInfo("MVATrainer")
692  << "Configuring " << (const char*)proc->getId()
693  << " \"" << (const char*)proc->getName() << "\".";
694  proc->configure(xmlConfig);
695 
696  sources.insert(std::make_pair(id, proc.release()));
697  processors.push_back(id);
698 }
TrainProcessor *const proc
Definition: MVATrainer.cc:101
void fillOutputVars(SourceVariableSet &vars, Source *source, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
Definition: MVATrainer.cc:838
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:18
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
static Base_t * create(const char *name, const CalibBase_t *calib, Parent_t *parent=0)
void fillInputVars(SourceVariableSet &vars, XERCES_CPP_NAMESPACE_QUALIFIER DOMElement *xml)
Definition: MVATrainer.cc:746
Calibration::MVAComputer * PhysicsTools::MVATrainer::makeTrainCalibration ( const AtomicId compute,
const AtomicId train 
) const
private

Definition at line 974 of file MVATrainer.cc.

References calib, bookConverter::compute(), connectProcessors(), crossValidation, doAutoSave, spr::find(), mps_fire::i, createfilelist::int, interceptors, gen::n, PhysicsTools::Calibration::ProcForeach::nProcs, output, proc, processors, randomSeed, source, and sources.

Referenced by getTrainCalibration().

976 {
977  std::map<AtomicId, TrainInterceptor*> interceptors;
978  std::vector<MVATrainerComputer::Interceptor> baseInterceptors;
979  std::vector<CalibratedProcessor> processors;
980 
981  BaseInterceptor *interceptor = new InitInterceptor;
982  baseInterceptors.push_back(std::make_pair(0, interceptor));
983  processors.push_back(CalibratedProcessor(0, interceptor));
984 
985  for(const AtomicId *iter = train; *iter; iter++) {
986  TrainProcessor *source;
987  if (*iter == kOutputId)
988  source = output;
989  else {
990  std::map<AtomicId, Source*>::const_iterator pos =
991  sources.find(*iter);
992  assert(pos != sources.end());
993  source = dynamic_cast<TrainProcessor*>(pos->second);
994  }
995  assert(source);
996 
997  interceptors[*iter] = new TrainInterceptor(source);
998  }
999 
1000  auto_cleaner<Calibration::VarProcessor> autoClean;
1001 
1002  std::set<AtomicId> done;
1003  for(const AtomicId *iter = compute; *iter; iter++) {
1004  if (done.erase(*iter))
1005  continue;
1006 
1007  std::map<AtomicId, Source*>::const_iterator pos =
1008  sources.find(*iter);
1009  assert(pos != sources.end());
1010  TrainProcessor *source =
1011  dynamic_cast<TrainProcessor*>(pos->second);
1012  assert(source);
1013  assert(source->isTrained());
1014 
1015  Calibration::VarProcessor *proc = source->getCalibration();
1016  if (!proc)
1017  continue;
1018 
1019  autoClean.add(proc);
1020  processors.push_back(CalibratedProcessor(source, proc));
1021 
1022  Calibration::ProcForeach *looper =
1023  dynamic_cast<Calibration::ProcForeach*>(proc);
1024  if (looper) {
1025  std::vector<AtomicId>::const_iterator pos2 =
1026  std::find(this->processors.begin(),
1027  this->processors.end(), *iter);
1028  assert(pos2 != this->processors.end());
1029  ++pos2;
1030  unsigned int n = 0;
1031  for(int i = 0; i < (int)looper->nProcs; ++i, ++pos2) {
1032  assert(pos2 != this->processors.end());
1033 
1034  const AtomicId *iter2 = compute;
1035  while(*iter2) {
1036  if (*iter2 == *pos2)
1037  break;
1038  iter2++;
1039  }
1040 
1041  if (*iter2) {
1042  n++;
1043  done.insert(*iter2);
1044  pos = sources.find(*iter2);
1045  assert(pos != sources.end());
1046  TrainProcessor *source =
1047  dynamic_cast<TrainProcessor*>(
1048  pos->second);
1049  assert(source);
1050  assert(source->isTrained());
1051 
1052  proc = source->getCalibration();
1053  if (proc) {
1054  autoClean.add(proc);
1055  processors.push_back(
1056  CalibratedProcessor(
1057  source, proc));
1058  }
1059  }
1060 
1061  std::map<AtomicId, TrainInterceptor*>::iterator
1062  pos3 = interceptors.find(*pos2);
1063  if (pos3 != interceptors.end()) {
1064  n++;
1065  baseInterceptors.push_back(
1066  std::make_pair(processors.size(),
1067  pos3->second));
1068  processors.push_back(
1069  CalibratedProcessor(
1070  pos3->second->getProcessor(),
1071  pos3->second));
1072  interceptors.erase(pos3);
1073  }
1074  }
1075 
1076  looper->nProcs = n;
1077  if (!n) {
1078  baseInterceptors.pop_back();
1079  processors.pop_back();
1080  }
1081  }
1082  }
1083 
1084  for(std::map<AtomicId, TrainInterceptor*>::const_iterator iter =
1085  interceptors.begin(); iter != interceptors.end(); ++iter) {
1086 
1087  TrainProcessor *proc = iter->second->getProcessor();
1088  baseInterceptors.push_back(std::make_pair(processors.size(),
1089  iter->second));
1090  processors.push_back(CalibratedProcessor(proc, iter->second));
1091  }
1092 
1093  std::unique_ptr<Calibration::MVAComputer> calib(
1094  new MVATrainerComputer(baseInterceptors, doAutoSave,
1096 
1097  connectProcessors(calib.get(), processors, true);
1098 
1099  return calib.release();
1100 }
TrainProcessor *const proc
Definition: MVATrainer.cc:101
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
Definition: looper.py:1
MVATrainerComputer * calib
Definition: MVATrainer.cc:64
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
void connectProcessors(Calibration::MVAComputer *calib, const std::vector< CalibratedProcessor > &procs, bool withTarget) const
Definition: MVATrainer.cc:885
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
std::vector< Interceptor > interceptors
Definition: MVATrainer.cc:134
def compute(min, max)
TrainProcessor * output
Definition: MVATrainer.h:104
static std::string const source
Definition: EdmProvDump.cc:43
static const AtomicId kOutputId("__OUTPUT__")
void PhysicsTools::MVATrainer::saveState ( )

Definition at line 603 of file MVATrainer.cc.

References doCleanup, processors, source, and sources.

Referenced by setCrossValidation().

604 {
605  doCleanup = false;
606 
607  for(std::vector<AtomicId>::const_iterator iter =
608  this->processors.begin();
609  iter != this->processors.end(); iter++) {
610  std::map<AtomicId, Source*>::const_iterator pos =
611  sources.find(*iter);
612  assert(pos != sources.end());
613  TrainProcessor *source =
614  dynamic_cast<TrainProcessor*>(pos->second);
615  assert(source);
616 
617  if (source->isTrained())
618  source->save();
619  }
620 }
std::vector< AtomicId > processors
Definition: MVATrainer.h:102
std::map< AtomicId, Source * > sources
Definition: MVATrainer.h:100
static std::string const source
Definition: EdmProvDump.cc:43
void PhysicsTools::MVATrainer::setAutoSave ( bool  autoSave)
inline

Definition at line 33 of file MVATrainer.h.

References doAutoSave.

33 { doAutoSave = autoSave; }
void PhysicsTools::MVATrainer::setCleanup ( bool  cleanup)
inline

Definition at line 34 of file MVATrainer.h.

References edm::cleanup(), and doCleanup.

34 { doCleanup = cleanup; }
static void cleanup(const Factory::MakerMap::value_type &v)
Definition: Factory.cc:12
void PhysicsTools::MVATrainer::setCrossValidation ( double  split)
inline

Definition at line 37 of file MVATrainer.h.

References crossValidation, doneTraining(), getCalibration(), getTrainCalibration(), loadState(), proc, saveState(), PhysicsTools::split(), AlCaHLTBitMon_QueryRunRegistry::string, and trainFileName().

Referenced by PhysicsTools::TreeTrainer::train().

static std::vector< std::string > split(const std::string line, char delim)
Definition: MLP.cc:18
void PhysicsTools::MVATrainer::setMonitoring ( bool  monitoring)
inline

Definition at line 35 of file MVATrainer.h.

References doMonitoring, and monitoring.

Referenced by PhysicsTools::TreeTrainer::train().

std::unique_ptr< TrainerMonitoring > monitoring
Definition: MVATrainer.h:106
void PhysicsTools::MVATrainer::setRandomSeed ( UInt_t  seed)
inline
std::string PhysicsTools::MVATrainer::trainFileName ( const TrainProcessor proc,
const std::string &  ext,
const std::string &  arg = "" 
) const

Definition at line 700 of file MVATrainer.cc.

References PhysicsTools::Source::getName(), PhysicsTools::stdStringPrintf(), AlCaHLTBitMon_QueryRunRegistry::string, and trainFileMask.

Referenced by setCrossValidation().

703 {
704  std::string arg_ = arg.size() > 0 ? ("_" + arg) : "";
705  return stdStringPrintf(trainFileMask.c_str(),
706  (const char*)proc->getName(),
707  arg_.c_str(), ext.c_str());
708 }
TrainProcessor *const proc
Definition: MVATrainer.cc:101
A arg
Definition: Factorize.h:36
std::string trainFileMask
Definition: MVATrainer.h:108
Definition: memstream.h:15
static std::string stdStringPrintf(const char *format,...)
Definition: MVATrainer.cc:183

Member Data Documentation

double PhysicsTools::MVATrainer::crossValidation
private

Definition at line 115 of file MVATrainer.h.

Referenced by makeTrainCalibration(), and setCrossValidation().

bool PhysicsTools::MVATrainer::doAutoSave
private

Definition at line 110 of file MVATrainer.h.

Referenced by makeTrainCalibration(), and setAutoSave().

bool PhysicsTools::MVATrainer::doCleanup
private
bool PhysicsTools::MVATrainer::doMonitoring
private

Definition at line 112 of file MVATrainer.h.

Referenced by bookMonitor(), findUntrainedComputers(), and setMonitoring().

Source* PhysicsTools::MVATrainer::input
private

Definition at line 103 of file MVATrainer.h.

Referenced by connectProcessors(), fillInputVars(), findUntrainedComputers(), and MVATrainer().

const AtomicId PhysicsTools::MVATrainer::kTargetId
static
const AtomicId PhysicsTools::MVATrainer::kWeightId
static
std::unique_ptr<TrainerMonitoring> PhysicsTools::MVATrainer::monitoring
private

Definition at line 106 of file MVATrainer.h.

Referenced by bookMonitor(), setMonitoring(), and ~MVATrainer().

std::string PhysicsTools::MVATrainer::name
private
TrainProcessor* PhysicsTools::MVATrainer::output
private
std::vector<AtomicId> PhysicsTools::MVATrainer::processors
private
UInt_t PhysicsTools::MVATrainer::randomSeed
private

Definition at line 114 of file MVATrainer.h.

Referenced by makeTrainCalibration(), and setRandomSeed().

std::map<AtomicId, Source*> PhysicsTools::MVATrainer::sources
private
std::string PhysicsTools::MVATrainer::trainFileMask
private

Definition at line 108 of file MVATrainer.h.

Referenced by bookMonitor(), MVATrainer(), and trainFileName().

std::vector<SourceVariable*> PhysicsTools::MVATrainer::variables
private

Definition at line 101 of file MVATrainer.h.

Referenced by connectProcessors(), createVariable(), fillInputVars(), and ~MVATrainer().

std::unique_ptr<XMLDocument> PhysicsTools::MVATrainer::xml
private

Definition at line 107 of file MVATrainer.h.

Referenced by MVATrainer().