CMS 3D CMS Logo

MVASteering_cfi.py
Go to the documentation of this file.
1 """
2  MVASteering.py
3  Author: Evan K. Friis, UC Davis (friis@physics.ucdavis.edu)
4 
5  Define the MVA configurations (ie TaNC) to be used in training/testing
6  - which neural net
7  - which algorithms (shrinkingConePFTauDecayModeProducer, etc)
8  Define locations of train/test ROOT files
9 """
10 
11 import sys
12 import os
13 # Get CMSSW base
14 try:
15  Project_Area = os.environ["CMSSW_BASE"]
16 except KeyError:
17  print "$CMSSW_BASE enviroment variable not set! Please run eval `scramv1 ru -[c]sh`"
18  sys.exit(1)
19 
20 import FWCore.ParameterSet.Config as cms
21 import glob
22 
23 # Get defintions (w/ decay mode mapping) from the python defintion file
25 
26 ####### USER PARAMETERS #######################################
27 #################################################################
28 
29 # Define lists of neural nets corresponding to a total configuration
30 # The Neural net objects used here (SingleNet, OneProngNoPiZero, etc) must be defined in
31 # the TauMVAConfigurations_cfi.py found in ../python
32 MVACollections = {}
33 
34 
35 # Use the Tau neural classifier configuration
36 MVACollections['TaNC'] = TaNC.value() # <--- defined in TauMVAConfigurations_cfi.py
37 
38 # non isolated, single net only
39 # MVACollections['SingleNet'] = SingleNetBasedTauID.value()
40 
41 # isolation applied, neural net for each decay mode
42 #MVACollections['MultiNetIso'] = MultiNetIso.value()
43 
44 # isolation applied, single neural net
45 #MVACollections['SingleNetIso'] = [SingleNetIso]
46 
47 # For training/evaluating on an isolated sample, define the isolated criteria here
48 IsolationCutForTraining = "Alt$(ChargedOutlierPt[0], 0) < 1.0 && Alt$(NeutralOutlierPt[0], 0) < 1.5" #no tracks above 1 GeV, no gammas above 1.5 GeV
49 
50 #Define the PFRecoTauDecayMode source to use (in the case of more than one separate directories will be created for each training sample)
51 myTauAlgorithms = ["shrinkingConePFTauDecayModeProducer"]
52 
53 # If true, output will be weighted such that the signal and background distributions have the same
54 # Pt-Eta distribution. To generate the weights file, run python BuildWeights.py
55 #UseWeights = True
56 UseWeights = True
57 
58 # If this is true, the weighted pt-eta distributions for each decay mode will be the same.
59 # otherwise, the pt-eta distribution of the entire training sample (post DM preselection)
60 # will be used.
61 WeightByIndividualDecayMode = False
62 
63 # If true, single isolatated charged pions and three prongs
64 # with charge +-3 will not be included in the training sample
65 ExcludePrepassAndPrefail = False
66 
67 # If greater than zero, require that either the lead track or lead pion have pt greater
68 # than the value supplied
69 RequireLeadPionPt = 5.0
70 LeadPionRequirementString = "( (MainTrackPt > %f && MainTrackAngle < 0.1) || (Alt$(TrackPt[0],0) > %f && Alt$(TrackAngle[0], 20) < 0.1) || (Alt$(PiZeroPt[0], 0) > %f && Alt$(PiZeroAngle[0], 20) < 0.1) )" % (RequireLeadPionPt,RequireLeadPionPt,RequireLeadPionPt)
71 
72 """
73 Example of multiple algorithms
74 myTauAlgorithms = ["pfTauDecayModeHighEfficiency",
75  "pfTauDecayModeInsideOut"]
76 """
77 
78 # define locations of signal/background root files
79 TauTagToolsWorkingDirectory = os.path.join(Project_Area, "src/RecoTauTag/TauTagTools")
80 SignalRootDir = os.path.join(TauTagToolsWorkingDirectory, "test", "ztt")
81 BackgroundRootDir = os.path.join(TauTagToolsWorkingDirectory, "test", "qcd")
82 
83 #Globs to get files for training and evaluation. If you want to ensure different sets, you can do something like
84 # add a requirement such as *[0123].root for training and *[4].root. (files not ending in four used for trianing, ending in four used for testing)
85 SignalFileTrainingGlob = "%s/*[012356789].root" % SignalRootDir
86 BackgroundFileTrainingGlob = "%s/*[012356789].root" % BackgroundRootDir
87 #SignalFileTrainingGlob = "%s/*[0].root" % SignalRootDir
88 #BackgroundFileTrainingGlob = "%s/*[0].root" % BackgroundRootDir
89 
90 SignalFileTestingGlob = "%s/*4.root" % SignalRootDir
91 BackgroundFileTestingGlob = "%s/*4.root" % BackgroundRootDir
92 
93 #################################################################
94 ##### DO NOT MODIFY BELOW THIS LINE (experts only) #############
95 #################################################################
96 
97 def GetTrainingFile(computerName, anAlgo):
98  return os.path.join(TauTagToolsWorkingDirectory, "test", "TrainDir_%s_%s" % (computerName, anAlgo), "%s.mva" % computerName)
99 
100 #Find the unique mva types to train
101 listOfMVANames = {}
102 for name, mvaCollection in MVACollections.iteritems():
103  for _mva in mvaCollection:
104  name = _mva.computerName.value()
105  if not name in listOfMVANames:
106  listOfMVANames[name] = _mva
107 
108 myModules = []
109 for name, _mva in listOfMVANames.iteritems():
110  myModules.append(_mva)
111 
112 SignalTrainFiles = glob.glob(SignalFileTrainingGlob)
113 BackgroundTrainFiles = glob.glob(BackgroundFileTrainingGlob)
114 
115 SignalTestingFiles = glob.glob(SignalFileTestingGlob)
116 BackgroundTestingFiles = glob.glob(BackgroundFileTestingGlob)
117 
118 # Catch dumb errors before we begin
120  if not len(SignalTrainFiles) or not len(BackgroundTrainFiles) or not len(SignalTestingFiles) or not len(BackgroundTestingFiles):
121  raise IOError("The signal/background root file training/testing file list is empty! Check the SignalFileTrainingGlob etc. in MVASteering.py")
122 
123  # Ensure that we have all the necessary XML files
124  for aModule in myModules:
125  computerName = aModule.computerName.value() #conver to python string
126  xmlFileLoc = os.path.join(TauTagToolsWorkingDirectory, "xml", "%s.xml" % computerName)
127  if not os.path.exists(xmlFileLoc):
128  raise IOError("Can't find xml configuration file for %s - please check that %s exists!" % (computerName, xmlFileLoc))
129 
130  if not os.path.exists(SignalRootDir):
131  raise IOError("Signal root file directory (%s) does not exist! Have you created the MVA raw training data?" % SignalRootDir)
132  if not os.path.exists(BackgroundRootDir):
133  raise IOError("Background root file directory (%s) does not exist! Have you created the MVA raw training data?" % BackgroundRootDir)
def GetTrainingFile(computerName, anAlgo)
DO NOT MODIFY BELOW THIS LINE (experts only) #############.
def EverythingInItsRightPlace()