00001
00002 import re, os
00003 import parsingRulesHelper
00004
00005 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
00006 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
00007
00008
00009 """
00010 Includes general functions to work with fileNames and related operations:
00011 * getting candle, step etc - JobID from fileName and vice-versa
00012 - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
00013 * root file size from candle,step
00014 * reads simulation candles to get release version
00015
00016 """
00017
00018 universal_candle_step_regs = {}
00019 test_root_file = re.compile(".root$", re.IGNORECASE)
00020
00021
00022 """
00023 We have Simulation candles lines in format like:
00024
00025 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
00026
00027 """
00028 simCandlesRules = (
00029
00030
00031 (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
00032
00033 (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
00034 (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
00035 (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
00036
00037 (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00038 (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
00039
00040
00041
00042 )
00043 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
00044
00045 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
00046
00047
00048
00049
00050 try:
00051 """ get the acual file """
00052 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00053 if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00054 except IndexError:
00055 return None
00056
00057 """ read and parse it; format: #Version : CMSSW_3_2_0 """
00058 f = open(SimulationCandles_file, 'r')
00059
00060 lines = [s.strip() for s in f.readlines()]
00061 f.close()
00062
00063
00064
00065 """ we call a shared helper to parse the file """
00066
00067 for line in lines:
00068
00069
00070
00071 info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
00072
00073 if 'auto:' in info['conditions']:
00074 from Configuration.PyReleaseValidation.autoCond import autoCond
00075 info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
00076 else:
00077 if 'FrontierConditions_GlobalTag' in info['conditions']:
00078 info['conditions']=info['conditions'].split(",")[1]
00079
00080
00081 if not missing_fields:
00082
00083 if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
00084
00085
00086 return info
00087
00088
00089
00090
00091
00092 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
00093
00094 """
00095 Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
00096 -- if no pile up returns empty string for pileup type
00097
00098 * the candle might include one optional underscore:
00099 >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
00100 ('PI-_1000', 'GEN,SIM', '', '')
00101
00102 * otherwise after candle we have two underscores:
00103 >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
00104 ('MINBIAS', 'GEN,FASTSIM', '', '')
00105
00106 * and lastly we have the PILEUP possibility:
00107 >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
00108 ('TTBAR', 'DIGI', 'PILEUP', '')
00109 """
00110 import os
00111
00112
00113 (path, filename) = os.path.split(logfile_name)
00114 if givenPath:
00115 path = givenPath
00116
00117 if not universal_candle_step_regs.has_key(suffix):
00118
00119 universal_candle_step_regs[suffix] = re.compile( \
00120 r"""
00121 #candle1_[opt:candle2]_
00122 ^([^_]+_[^_]*)_
00123
00124 # step
00125 ([^_]+)(_PILEUP)?%s$
00126 """ % suffix , re.VERBOSE)
00127
00128
00129
00130
00131 result = universal_candle_step_regs[suffix].search(filename)
00132 if result:
00133
00134
00135 candle = result.groups()[0]
00136 step = result.groups()[1]
00137 is_pileup = result.groups()[2]
00138 if is_pileup:
00139 is_pileup = "PILEUP"
00140 else:
00141 is_pileup = ""
00142
00143 """ if we had the candle without underscore inside (like TTBAR but not E_1000)
00144 on the end of result and underscore which needs to be removed """
00145
00146 if (candle[-1] == '_'):
00147 candle = candle[0:-1]
00148
00149 """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
00150 conditions = ''
00151 event_content = ''
00152 try:
00153 conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
00154 if conf:
00155 is_pileup = conf["pileup_type"]
00156 conditions = conf["conditions"]
00157 event_content = conf["event_content"]
00158 except OSError, e:
00159 pass
00160
00161 return (candle, step, is_pileup, conditions, event_content)
00162 else:
00163 return (None, None, None, None, None)
00164
00165
00166 def getJobID_fromRootFileName(logfile_name):
00167 """
00168 Returns the candle and STEP out of filename:
00169
00170 * the candle might include one optional underscore:
00171 >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
00172 ('PI-_1000', 'GEN,SIM', '', '')
00173
00174 * otherwise after candle we have two underscores:
00175 >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
00176 ('MINBIAS', 'GEN,FASTSIM', '', '')
00177
00178 * and lastly we have the PILEUP possibility:
00179 >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
00180 ('TTBAR', 'DIGI', 'PILEUP', '')
00181 """
00182 return getJobID_fromFileName(logfile_name, "\\.root")
00183
00184 def getJobID_fromEdmSizeFileName(logfile_name):
00185 """
00186 Returns the candle and STEP out of filename:
00187
00188 * the candle might include one optional underscore:
00189 >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
00190 ('E_1000', 'GEN,SIM', '', '')
00191
00192 * otherwise after candle we have two underscores:
00193 >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
00194 ('TTBAR', 'RAW2DIGI,RECO', '', '')
00195
00196 * and lastly we have the PILEUP possibility:
00197 >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
00198 ('TTBAR', 'GEN,SIM', 'PILEUP', '')
00199 """
00200 return getJobID_fromFileName(logfile_name, "_EdmSize")
00201
00202 def getJobID_fromTimeReportLogName(logfile_name):
00203 """
00204 Returns the candle and STEP out of filename:
00205
00206 * the candle might include one optional underscore:
00207 >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
00208 ('E_1000', 'GEN,SIM', '', '')
00209
00210 * otherwise after candle we have two underscores:
00211 >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00212 ('TTBAR', 'RAW2DIGI,RECO', '', '')
00213
00214 * and lastly we have the PILEUP possibility:
00215 >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
00216 ('TTBAR', 'DIGI', 'PILEUP', '')
00217 """
00218 return getJobID_fromFileName(logfile_name, "_TimingReport.log")
00219
00220
00221 """ Get the root file size for the candle, step in current dir """
00222 def getRootFileSize(path, candle, step):
00223 files = os.listdir(path)
00224 root_files = [os.path.join(path, f) for f in files
00225 if test_root_file.search(f)
00226 and os.path.isfile(os.path.join(path, f)) ]
00227
00228 """ get the size of file if it is the root file for current candle and step """
00229 try:
00230 size = [os.stat(f).st_size for f in root_files
00231 if f_candle_and_step_inJobID(candle, step, getJobID_fromRootFileName(f))][0]
00232 except Exception, e:
00233 print e
00234 return 0
00235 return size
00236
00237 def read_SimulationCandles(path):
00238
00239
00240
00241
00242 """ get the acual file """
00243 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00244 if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00245
00246 """ read and parse it; format: #Version : CMSSW_3_2_0 """
00247 f = open(SimulationCandles_file, 'r')
00248 lines = f.readlines()
00249 f.close()
00250
00251 release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
00252 return release_version
00253
00254
00255 if __name__ == "__main__":
00256 import doctest
00257 doctest.testmod()
00258 path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
00259 print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
00260
00261
00262
00263
00264