00001
00002 import re, os
00003 import parsingRulesHelper
00004
00005 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
00006 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
00007
00008
00009 """
00010 Includes general functions to work with fileNames and related operations:
00011 * getting candle, step etc - JobID from fileName and vice-versa
00012 - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
00013 * root file size from candle,step
00014 * reads simulation candles to get release version
00015
00016 """
00017
00018 universal_candle_step_regs = {}
00019 test_root_file = re.compile(".root$", re.IGNORECASE)
00020
00021
00022 """
00023 We have Simulation candles lines in format like:
00024
00025 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
00026
00027 """
00028 simCandlesRules = (
00029
00030
00031 (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
00032
00033 (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
00034 (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
00035 (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
00036
00037 (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00038 (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
00039
00040
00041
00042 )
00043 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
00044
00045 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
00046
00047
00048
00049
00050 try:
00051 """ get the acual file """
00052 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00053 if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00054 except IndexError:
00055 return None
00056
00057 """ read and parse it; format: #Version : CMSSW_3_2_0 """
00058 f = open(SimulationCandles_file, 'r')
00059
00060 lines = [s.strip() for s in f.readlines()]
00061 f.close()
00062
00063
00064
00065 """ we call a shared helper to parse the file """
00066
00067 for line in lines:
00068
00069
00070
00071 info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
00072
00073
00074 if 'auto:' in info['conditions']:
00075 from Configuration.AlCa.autoCond import autoCond
00076 info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
00077 else:
00078 if 'FrontierConditions_GlobalTag' in info['conditions']:
00079 info['conditions']=info['conditions'].split(",")[1]
00080
00081
00082 if not missing_fields:
00083
00084 if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
00085
00086
00087 return info
00088
00089
00090
00091
00092
00093 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
00094
00095 """
00096 Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
00097 -- if no pile up returns empty string for pileup type
00098
00099 * the candle might include one optional underscore:
00100 >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
00101 ('PI-_1000', 'GEN,SIM', '', '')
00102
00103 * otherwise after candle we have two underscores:
00104 >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
00105 ('MINBIAS', 'GEN,FASTSIM', '', '')
00106
00107 * and lastly we have the PILEUP possibility:
00108 >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
00109 ('TTBAR', 'DIGI', 'PILEUP', '')
00110 """
00111 import os
00112
00113
00114 (path, filename) = os.path.split(logfile_name)
00115 if givenPath:
00116 path = givenPath
00117
00118 if not universal_candle_step_regs.has_key(suffix):
00119
00120 universal_candle_step_regs[suffix] = re.compile( \
00121 r"""
00122 #candle1_[opt:candle2]_
00123 ^([^_]+_[^_]*)_
00124
00125 # step
00126 ([^_]+)(_PILEUP)?%s$
00127 """ % suffix , re.VERBOSE)
00128
00129
00130
00131
00132 result = universal_candle_step_regs[suffix].search(filename)
00133 if result:
00134
00135
00136 candle = result.groups()[0]
00137 step = result.groups()[1].replace('-', ',')
00138 is_pileup = result.groups()[2]
00139 if is_pileup:
00140 is_pileup = "PILEUP"
00141 else:
00142 is_pileup = ""
00143
00144 """ if we had the candle without underscore inside (like TTBAR but not E_1000)
00145 on the end of result and underscore which needs to be removed """
00146
00147 if (candle[-1] == '_'):
00148 candle = candle[0:-1]
00149
00150 """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
00151 conditions = ''
00152 event_content = ''
00153 try:
00154 conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
00155 if conf:
00156 is_pileup = conf["pileup_type"]
00157 conditions = conf["conditions"]
00158 event_content = conf["event_content"]
00159 except OSError, e:
00160 pass
00161
00162 return (candle, step, is_pileup, conditions, event_content)
00163 else:
00164 return (None, None, None, None, None)
00165
00166
00167 def getJobID_fromRootFileName(logfile_name):
00168 """
00169 Returns the candle and STEP out of filename:
00170
00171 * the candle might include one optional underscore:
00172 >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
00173 ('PI-_1000', 'GEN,SIM', '', '')
00174
00175 * otherwise after candle we have two underscores:
00176 >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
00177 ('MINBIAS', 'GEN,FASTSIM', '', '')
00178
00179 * and lastly we have the PILEUP possibility:
00180 >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
00181 ('TTBAR', 'DIGI', 'PILEUP', '')
00182 """
00183 return getJobID_fromFileName(logfile_name, "\\.root")
00184
00185 def getJobID_fromEdmSizeFileName(logfile_name):
00186 """
00187 Returns the candle and STEP out of filename:
00188
00189 * the candle might include one optional underscore:
00190 >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
00191 ('E_1000', 'GEN,SIM', '', '')
00192
00193 * otherwise after candle we have two underscores:
00194 >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
00195 ('TTBAR', 'RAW2DIGI,RECO', '', '')
00196
00197 * and lastly we have the PILEUP possibility:
00198 >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
00199 ('TTBAR', 'GEN,SIM', 'PILEUP', '')
00200 """
00201 return getJobID_fromFileName(logfile_name, "_EdmSize")
00202
00203 def getJobID_fromTimeReportLogName(logfile_name):
00204 """
00205 Returns the candle and STEP out of filename:
00206
00207 * the candle might include one optional underscore:
00208 >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
00209 ('E_1000', 'GEN,SIM', '', '')
00210
00211 * otherwise after candle we have two underscores:
00212 >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00213 ('TTBAR', 'RAW2DIGI,RECO', '', '')
00214
00215 * and lastly we have the PILEUP possibility:
00216 >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
00217 ('TTBAR', 'DIGI', 'PILEUP', '')
00218 """
00219 return getJobID_fromFileName(logfile_name, "_TimingReport.log")
00220
00221 def getJobID_fromMemcheckLogName(logfile_name):
00222 """
00223 Returns the candle and STEP out of filename:
00224
00225 * otherwise after candle we have two underscores:
00226 >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_memcheck_vlgd.xml")
00227 ('TTBAR', 'RAW2DIGI,RECO', '', '')
00228
00229 * and lastly we have the PILEUP possibility:
00230 >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_memcheck_vlgd.xml")
00231 ('TTBAR', 'DIGI', 'PILEUP', '')
00232 """
00233 return getJobID_fromFileName(logfile_name, "_memcheck_vlgd.xml")
00234
00235 def getJobID_fromIgProfLogName(logfile_name):
00236 """
00237 Returns the candle and STEP out of .sql3 filename:
00238
00239 everything is given, just have to split it...
00240 like:
00241 TTbar___GEN,FASTSIM___LowLumiPileUp___MC_37Y_V5___RAWSIM___MEM_LIVE___1.sql3
00242 and correct the conditions!
00243
00244 """
00245
00246 (path, filename) = os.path.split(logfile_name)
00247
00248 params = filename.split("___")
00249 candle = params[0].upper()
00250 step = params[1]
00251 pileup_type = params[2]
00252 if pileup_type == "NOPILEUP":
00253 pileup_type = ""
00254 elif pileup_type == "LowLumiPileUp":
00255 pileup_type = "PILEUP"
00256
00257
00258
00259
00260 conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= pileup_type)
00261 if conf:
00262 is_pileup = conf["pileup_type"]
00263 conditions = conf["conditions"]
00264 event_content = conf["event_content"]
00265 return (candle, step, is_pileup, conditions, event_content)
00266 else:
00267 return (None, None, None, None, None)
00268
00269 """ Get the root file size for the candle, step in current dir """
00270 def getRootFileSize(path, candle, step):
00271 files = os.listdir(path)
00272 root_files = [os.path.join(path, f) for f in files
00273 if test_root_file.search(f)
00274 and os.path.isfile(os.path.join(path, f)) ]
00275
00276 """ get the size of file if it is the root file for current candle and step """
00277 try:
00278 size = [os.stat(f).st_size for f in root_files
00279 if f_candle_and_step_inJobID(candle, step, getJobID_fromRootFileName(f))][0]
00280 except Exception, e:
00281 print e
00282 return 0
00283 return size
00284
00285 def read_SimulationCandles(path):
00286
00287
00288
00289
00290 """ get the acual file """
00291 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00292 if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00293
00294 """ read and parse it; format: #Version : CMSSW_3_2_0 """
00295 f = open(SimulationCandles_file, 'r')
00296 lines = f.readlines()
00297 f.close()
00298
00299 release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
00300 return release_version
00301
00302
00303 if __name__ == "__main__":
00304 import doctest
00305 doctest.testmod()
00306 path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
00307 print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
00308
00309
00310
00311
00312