CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_14/src/RecoTauTag/TauTagTools/python/MVASteering_cfi.py

Go to the documentation of this file.
00001 """
00002         MVASteering.py
00003         Author: Evan K. Friis, UC Davis (friis@physics.ucdavis.edu)
00004 
00005         Define the MVA configurations (ie TaNC) to be used in training/testing
00006                 - which neural net 
00007                 - which algorithms (shrinkingConePFTauDecayModeProducer, etc)
00008         Define locations of train/test ROOT files
00009 """
00010 
00011 import sys
00012 import os
00013 # Get CMSSW base
00014 try:
00015    Project_Area = os.environ["CMSSW_BASE"]
00016 except KeyError:
00017    print "$CMSSW_BASE enviroment variable not set!  Please run eval `scramv1 ru -[c]sh`"
00018    sys.exit(1)
00019 
00020 import FWCore.ParameterSet.Config as cms
00021 import glob
00022 
00023 # Get defintions (w/ decay mode mapping) from the python defintion file
00024 from RecoTauTag.TauTagTools.TauMVAConfigurations_cfi import *
00025 
00026 #######  USER PARAMETERS  #######################################
00027 #################################################################
00028 
00029 # Define lists of neural nets corresponding to a total configuration
00030 # The Neural net objects used here (SingleNet, OneProngNoPiZero, etc) must be defined in
00031 # the TauMVAConfigurations_cfi.py found in ../python
00032 MVACollections = {}
00033 
00034 
00035 # Use the Tau neural classifier configuration
00036 MVACollections['TaNC'] = TaNC.value()  # <--- defined in TauMVAConfigurations_cfi.py
00037 
00038 # non isolated, single net only
00039 # MVACollections['SingleNet'] = SingleNetBasedTauID.value()       
00040 
00041 # isolation applied, neural net for each decay mode
00042 #MVACollections['MultiNetIso'] = MultiNetIso.value()
00043 
00044 # isolation applied, single neural net 
00045 #MVACollections['SingleNetIso'] = [SingleNetIso]
00046 
00047 # For training/evaluating on an isolated sample, define the isolated criteria here
00048 IsolationCutForTraining = "Alt$(ChargedOutlierPt[0], 0) < 1.0 && Alt$(NeutralOutlierPt[0], 0) < 1.5" #no tracks above 1 GeV, no gammas above 1.5  GeV
00049 
00050 #Define the PFRecoTauDecayMode source to use (in the case of more than one separate directories will be created for each training sample)
00051 myTauAlgorithms = ["shrinkingConePFTauDecayModeProducer"]
00052 
00053 # If true, output will be weighted such that the signal and background distributions have the same
00054 #  Pt-Eta distribution.  To generate the weights file, run python BuildWeights.py
00055 #UseWeights = True
00056 UseWeights = True
00057 
00058 # If this is true, the weighted pt-eta distributions for each decay mode will be the same.
00059 #  otherwise, the pt-eta distribution of the entire training sample (post DM preselection)
00060 #  will be used.
00061 WeightByIndividualDecayMode = False
00062 
00063 # If true, single isolatated charged pions and three prongs
00064 #  with charge  +-3 will not be included in the training sample
00065 ExcludePrepassAndPrefail = False
00066 
00067 # If greater than zero, require that either the lead track or lead pion have pt greater 
00068 #  than the value supplied
00069 RequireLeadPionPt = 5.0
00070 LeadPionRequirementString = "( (MainTrackPt > %f && MainTrackAngle < 0.1) || (Alt$(TrackPt[0],0) > %f && Alt$(TrackAngle[0], 20) < 0.1) || (Alt$(PiZeroPt[0], 0) > %f && Alt$(PiZeroAngle[0], 20) < 0.1) )" % (RequireLeadPionPt,RequireLeadPionPt,RequireLeadPionPt)
00071 
00072 """
00073 Example of multiple algorithms
00074 myTauAlgorithms = ["pfTauDecayModeHighEfficiency",
00075                    "pfTauDecayModeInsideOut"]
00076 """
00077 
00078 # define locations of signal/background root files
00079 TauTagToolsWorkingDirectory = os.path.join(Project_Area, "src/RecoTauTag/TauTagTools")
00080 SignalRootDir               = os.path.join(TauTagToolsWorkingDirectory, "test", "ztt")
00081 BackgroundRootDir           = os.path.join(TauTagToolsWorkingDirectory, "test", "qcd")
00082 
00083 #Globs to get files for training and evaluation.  If you want to ensure different sets, you can do something like
00084 # add a requirement such as *[0123].root for training and *[4].root.  (files not ending in four used for trianing, ending in four used for testing)
00085 SignalFileTrainingGlob     = "%s/*[012356789].root" % SignalRootDir
00086 BackgroundFileTrainingGlob = "%s/*[012356789].root" % BackgroundRootDir
00087 #SignalFileTrainingGlob     = "%s/*[0].root" % SignalRootDir
00088 #BackgroundFileTrainingGlob = "%s/*[0].root" % BackgroundRootDir
00089 
00090 SignalFileTestingGlob     = "%s/*4.root" % SignalRootDir
00091 BackgroundFileTestingGlob = "%s/*4.root" % BackgroundRootDir
00092 
00093 #################################################################
00094 #####  DO NOT MODIFY BELOW THIS LINE (experts only) #############
00095 #################################################################
00096 
00097 def GetTrainingFile(computerName, anAlgo):
00098    return os.path.join(TauTagToolsWorkingDirectory, "test", "TrainDir_%s_%s" % (computerName, anAlgo), "%s.mva" % computerName)
00099 
00100 #Find the unique mva types to train
00101 listOfMVANames = {}
00102 for name, mvaCollection in MVACollections.iteritems():
00103    for mva in mvaCollection:
00104       name = mva.computerName.value()
00105       if not name in listOfMVANames:
00106          listOfMVANames[name] = mva
00107 
00108 myModules = []
00109 for name, mva in listOfMVANames.iteritems():
00110    myModules.append(mva)
00111 
00112 SignalTrainFiles         = glob.glob(SignalFileTrainingGlob)
00113 BackgroundTrainFiles     = glob.glob(BackgroundFileTrainingGlob)
00114 
00115 SignalTestingFiles         = glob.glob(SignalFileTestingGlob)
00116 BackgroundTestingFiles     = glob.glob(BackgroundFileTestingGlob)
00117 
00118 # Catch dumb errors before we begin
00119 def EverythingInItsRightPlace():
00120    if not len(SignalTrainFiles) or not len(BackgroundTrainFiles) or not len(SignalTestingFiles) or not len(BackgroundTestingFiles):
00121       raise IOError, "The signal/background root file training/testing file list is empty! Check the SignalFileTrainingGlob etc. in MVASteering.py"
00122 
00123    # Ensure that we have all the necessary XML files 
00124    for aModule in myModules:
00125       computerName = aModule.computerName.value() #conver to python string
00126       xmlFileLoc   = os.path.join(TauTagToolsWorkingDirectory, "xml", "%s.xml" % computerName)
00127       if not os.path.exists(xmlFileLoc):
00128          raise IOError, "Can't find xml configuration file for %s - please check that %s exists!" % (computerName, xmlFileLoc)
00129 
00130    if not os.path.exists(SignalRootDir):
00131       raise IOError, "Signal root file directory (%s) does not exist! Have you created the MVA raw training data?" % SignalRootDir
00132    if not os.path.exists(BackgroundRootDir):
00133       raise IOError, "Background root file directory (%s) does not exist! Have you created the MVA raw training data?" % BackgroundRootDir