CMS 3D CMS Logo

MVASteering_cfi.py
Go to the documentation of this file.
1 """
2  MVASteering.py
3  Author: Evan K. Friis, UC Davis (friis@physics.ucdavis.edu)
4 
5  Define the MVA configurations (ie TaNC) to be used in training/testing
6  - which neural net
7  - which algorithms (shrinkingConePFTauDecayModeProducer, etc)
8  Define locations of train/test ROOT files
9 """
10 
11 import sys
12 import six
13 import os
14 # Get CMSSW base
15 try:
16  Project_Area = os.environ["CMSSW_BASE"]
17 except KeyError:
18  print "$CMSSW_BASE enviroment variable not set! Please run eval `scramv1 ru -[c]sh`"
19  sys.exit(1)
20 
21 import FWCore.ParameterSet.Config as cms
22 import glob
23 
24 # Get defintions (w/ decay mode mapping) from the python defintion file
26 
27 ####### USER PARAMETERS #######################################
28 #################################################################
29 
30 # Define lists of neural nets corresponding to a total configuration
31 # The Neural net objects used here (SingleNet, OneProngNoPiZero, etc) must be defined in
32 # the TauMVAConfigurations_cfi.py found in ../python
33 MVACollections = {}
34 
35 
36 # Use the Tau neural classifier configuration
37 MVACollections['TaNC'] = TaNC.value() # <--- defined in TauMVAConfigurations_cfi.py
38 
39 # non isolated, single net only
40 # MVACollections['SingleNet'] = SingleNetBasedTauID.value()
41 
42 # isolation applied, neural net for each decay mode
43 #MVACollections['MultiNetIso'] = MultiNetIso.value()
44 
45 # isolation applied, single neural net
46 #MVACollections['SingleNetIso'] = [SingleNetIso]
47 
48 # For training/evaluating on an isolated sample, define the isolated criteria here
49 IsolationCutForTraining = "Alt$(ChargedOutlierPt[0], 0) < 1.0 && Alt$(NeutralOutlierPt[0], 0) < 1.5" #no tracks above 1 GeV, no gammas above 1.5 GeV
50 
51 #Define the PFRecoTauDecayMode source to use (in the case of more than one separate directories will be created for each training sample)
52 myTauAlgorithms = ["shrinkingConePFTauDecayModeProducer"]
53 
54 # If true, output will be weighted such that the signal and background distributions have the same
55 # Pt-Eta distribution. To generate the weights file, run python BuildWeights.py
56 #UseWeights = True
57 UseWeights = True
58 
59 # If this is true, the weighted pt-eta distributions for each decay mode will be the same.
60 # otherwise, the pt-eta distribution of the entire training sample (post DM preselection)
61 # will be used.
62 WeightByIndividualDecayMode = False
63 
64 # If true, single isolatated charged pions and three prongs
65 # with charge +-3 will not be included in the training sample
66 ExcludePrepassAndPrefail = False
67 
68 # If greater than zero, require that either the lead track or lead pion have pt greater
69 # than the value supplied
70 RequireLeadPionPt = 5.0
71 LeadPionRequirementString = "( (MainTrackPt > %f && MainTrackAngle < 0.1) || (Alt$(TrackPt[0],0) > %f && Alt$(TrackAngle[0], 20) < 0.1) || (Alt$(PiZeroPt[0], 0) > %f && Alt$(PiZeroAngle[0], 20) < 0.1) )" % (RequireLeadPionPt,RequireLeadPionPt,RequireLeadPionPt)
72 
73 """
74 Example of multiple algorithms
75 myTauAlgorithms = ["pfTauDecayModeHighEfficiency",
76  "pfTauDecayModeInsideOut"]
77 """
78 
79 # define locations of signal/background root files
80 TauTagToolsWorkingDirectory = os.path.join(Project_Area, "src/RecoTauTag/TauTagTools")
81 SignalRootDir = os.path.join(TauTagToolsWorkingDirectory, "test", "ztt")
82 BackgroundRootDir = os.path.join(TauTagToolsWorkingDirectory, "test", "qcd")
83 
84 #Globs to get files for training and evaluation. If you want to ensure different sets, you can do something like
85 # add a requirement such as *[0123].root for training and *[4].root. (files not ending in four used for trianing, ending in four used for testing)
86 SignalFileTrainingGlob = "%s/*[012356789].root" % SignalRootDir
87 BackgroundFileTrainingGlob = "%s/*[012356789].root" % BackgroundRootDir
88 #SignalFileTrainingGlob = "%s/*[0].root" % SignalRootDir
89 #BackgroundFileTrainingGlob = "%s/*[0].root" % BackgroundRootDir
90 
91 SignalFileTestingGlob = "%s/*4.root" % SignalRootDir
92 BackgroundFileTestingGlob = "%s/*4.root" % BackgroundRootDir
93 
94 #################################################################
95 ##### DO NOT MODIFY BELOW THIS LINE (experts only) #############
96 #################################################################
97 
98 def GetTrainingFile(computerName, anAlgo):
99  return os.path.join(TauTagToolsWorkingDirectory, "test", "TrainDir_%s_%s" % (computerName, anAlgo), "%s.mva" % computerName)
100 
101 #Find the unique mva types to train
102 listOfMVANames = {}
103 for name, mvaCollection in six.iteritems(MVACollections):
104  for _mva in mvaCollection:
105  name = _mva.computerName.value()
106  if not name in listOfMVANames:
107  listOfMVANames[name] = _mva
108 
109 myModules = []
110 for name, _mva in six.iteritems(listOfMVANames):
111  myModules.append(_mva)
112 
113 SignalTrainFiles = glob.glob(SignalFileTrainingGlob)
114 BackgroundTrainFiles = glob.glob(BackgroundFileTrainingGlob)
115 
116 SignalTestingFiles = glob.glob(SignalFileTestingGlob)
117 BackgroundTestingFiles = glob.glob(BackgroundFileTestingGlob)
118 
119 # Catch dumb errors before we begin
121  if not len(SignalTrainFiles) or not len(BackgroundTrainFiles) or not len(SignalTestingFiles) or not len(BackgroundTestingFiles):
122  raise IOError("The signal/background root file training/testing file list is empty! Check the SignalFileTrainingGlob etc. in MVASteering.py")
123 
124  # Ensure that we have all the necessary XML files
125  for aModule in myModules:
126  computerName = aModule.computerName.value() #conver to python string
127  xmlFileLoc = os.path.join(TauTagToolsWorkingDirectory, "xml", "%s.xml" % computerName)
128  if not os.path.exists(xmlFileLoc):
129  raise IOError("Can't find xml configuration file for %s - please check that %s exists!" % (computerName, xmlFileLoc))
130 
131  if not os.path.exists(SignalRootDir):
132  raise IOError("Signal root file directory (%s) does not exist! Have you created the MVA raw training data?" % SignalRootDir)
133  if not os.path.exists(BackgroundRootDir):
134  raise IOError("Background root file directory (%s) does not exist! Have you created the MVA raw training data?" % BackgroundRootDir)
def GetTrainingFile(computerName, anAlgo)
DO NOT MODIFY BELOW THIS LINE (experts only) #############.
def EverythingInItsRightPlace()