CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_2_9/src/Validation/Performance/python/FileNamesHelper.py

Go to the documentation of this file.
00001 #!/usr/bin/env python2.4
00002 import re, os
00003 import parsingRulesHelper
00004 
00005 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
00006 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
00007 
00008 
00009 """
00010 Includes general functions to work with fileNames and related operations:
00011 * getting candle, step etc - JobID from fileName and vice-versa
00012   - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
00013 * root file size from candle,step
00014 * reads simulation candles to get release version
00015 
00016 """
00017 
00018 universal_candle_step_regs = {}
00019 test_root_file = re.compile(".root$", re.IGNORECASE)
00020 
00021 
00022 """ 
00023 We have Simulation candles lines in format like: 
00024 
00025 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG  --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
00026 
00027 """
00028 simCandlesRules =  (
00029 
00030                         #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
00031                         (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
00032                         #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
00033                         (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
00034                         (("",  "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
00035                         (("",  "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
00036                         #not shure if event content is required
00037                         (("",  "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00038                         (("",  "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
00039   
00040                         #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
00041                         #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
00042 )
00043 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
00044         
00045 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
00046         # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
00047         # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
00048         # how to reproduce stuff: TODO
00049 
00050         try:
00051                 """ get the acual file """
00052                 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00053                                          if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00054         except IndexError:
00055                 return None
00056 
00057         """ read and parse it;  format: #Version     : CMSSW_3_2_0 """
00058         f = open(SimulationCandles_file, 'r')   
00059 
00060         lines =  [s.strip() for s in f.readlines()]
00061         f.close()
00062 
00063 
00064 
00065         """ we call a shared helper to parse the file """
00066 
00067         for line in lines:
00068                 #print line
00069                 #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
00070 
00071                 info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
00072                 #print info
00073                 #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
00074                 if 'auto:' in info['conditions']:
00075                         from Configuration.AlCa.autoCond import autoCond
00076                         info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0] 
00077                 else:
00078                         if 'FrontierConditions_GlobalTag' in info['conditions']:
00079                                 info['conditions']=info['conditions'].split(",")[1]
00080                 #print (info, missing_fields)
00081                 #if we successfully parsed the line of simulation candles:
00082                 if not missing_fields:
00083                         #we have to match only step and 
00084                         if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
00085                                 # if it's pile up or not:
00086                                 #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
00087                                 return info
00088                                 
00089 
00090 
00091 
00092 
00093 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
00094         #TODO: join together with the one from parseTimingReport.py
00095         """ 
00096         Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
00097         -- if no pile up returns empty string for pileup type
00098         
00099         * the candle might include one optional underscore:
00100         >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
00101         ('PI-_1000', 'GEN,SIM', '', '')
00102         
00103         * otherwise after candle we have two underscores:
00104         >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
00105         ('MINBIAS', 'GEN,FASTSIM', '', '')
00106         
00107         * and lastly we have the PILEUP possibility:
00108         >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
00109         ('TTBAR', 'DIGI', 'PILEUP', '')
00110         """
00111         import os
00112         
00113         # get the actual filename (no path
00114         (path, filename) = os.path.split(logfile_name)
00115         if givenPath:
00116                 path = givenPath
00117         
00118         if not universal_candle_step_regs.has_key(suffix):
00119                 #create and cache a regexp
00120                 universal_candle_step_regs[suffix] = re.compile( \
00121                         r"""
00122                         #candle1_[opt:candle2]_         
00123                         ^([^_]+_[^_]*)_
00124 
00125                         # step
00126                         ([^_]+)(_PILEUP)?%s$
00127                 """ % suffix , re.VERBOSE)
00128 
00129         
00130 
00131         #print logfile_name
00132         result = universal_candle_step_regs[suffix].search(filename)
00133         if result:
00134                 #print result.groups()
00135                 #print "result: %s" % str(result.groups())
00136                 candle = result.groups()[0]
00137                 step = result.groups()[1].replace('-', ',')
00138                 is_pileup = result.groups()[2]
00139                 if is_pileup:
00140                         is_pileup = "PILEUP"
00141                 else:
00142                         is_pileup = ""
00143                 
00144                 """ if we had the candle without underscore inside (like TTBAR but not E_1000) 
00145                 on the end of result and underscore which needs to be removed """
00146                 
00147                 if (candle[-1] == '_'):
00148                         candle = candle[0:-1]
00149 
00150                 """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
00151                 conditions = ''
00152                 event_content = ''
00153                 try:
00154                         conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
00155                         if conf:
00156                                 is_pileup = conf["pileup_type"]
00157                                 conditions = conf["conditions"]
00158                                 event_content = conf["event_content"]
00159                 except OSError, e:
00160                         pass
00161 
00162                 return (candle, step, is_pileup, conditions, event_content)
00163         else:
00164                 return (None, None, None, None, None)
00165 
00166 
00167 def getJobID_fromRootFileName(logfile_name):
00168         """ 
00169         Returns the candle and STEP out of filename:
00170         
00171         * the candle might include one optional underscore:
00172         >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
00173         ('PI-_1000', 'GEN,SIM', '', '')
00174         
00175         * otherwise after candle we have two underscores:
00176         >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
00177         ('MINBIAS', 'GEN,FASTSIM', '', '')
00178         
00179         * and lastly we have the PILEUP possibility:
00180         >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
00181         ('TTBAR', 'DIGI', 'PILEUP', '')
00182         """
00183         return getJobID_fromFileName(logfile_name, "\\.root")
00184 
00185 def getJobID_fromEdmSizeFileName(logfile_name):
00186         """ 
00187         Returns the candle and STEP out of filename:
00188         
00189         * the candle might include one optional underscore:
00190         >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
00191         ('E_1000', 'GEN,SIM', '', '')
00192         
00193         * otherwise after candle we have two underscores:
00194         >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
00195         ('TTBAR', 'RAW2DIGI,RECO', '', '')
00196         
00197         * and lastly we have the PILEUP possibility:
00198         >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
00199         ('TTBAR', 'GEN,SIM', 'PILEUP', '')
00200         """
00201         return getJobID_fromFileName(logfile_name, "_EdmSize")
00202 
00203 def getJobID_fromTimeReportLogName(logfile_name):
00204         """ 
00205         Returns the candle and STEP out of filename:
00206         
00207         * the candle might include one optional underscore:
00208         >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
00209         ('E_1000', 'GEN,SIM', '', '')
00210         
00211         * otherwise after candle we have two underscores:
00212         >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00213         ('TTBAR', 'RAW2DIGI,RECO', '', '')
00214         
00215         * and lastly we have the PILEUP possibility:
00216         >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
00217         ('TTBAR', 'DIGI', 'PILEUP', '')
00218         """
00219         return getJobID_fromFileName(logfile_name, "_TimingReport.log")
00220 
00221 def getJobID_fromMemcheckLogName(logfile_name):
00222         """ 
00223         Returns the candle and STEP out of filename:
00224         
00225         * otherwise after candle we have two underscores:
00226         >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_memcheck_vlgd.xml")
00227         ('TTBAR', 'RAW2DIGI,RECO', '', '')
00228         
00229         * and lastly we have the PILEUP possibility:
00230         >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_memcheck_vlgd.xml")
00231         ('TTBAR', 'DIGI', 'PILEUP', '')
00232         """
00233         return getJobID_fromFileName(logfile_name, "_memcheck_vlgd.xml")        
00234 
00235 def getJobID_fromIgProfLogName(logfile_name):
00236         """ 
00237         Returns the candle and STEP out of .sql3 filename:
00238 
00239         everything is given, just have to split it...
00240         like:
00241         TTbar___GEN,FASTSIM___LowLumiPileUp___MC_37Y_V5___RAWSIM___MEM_LIVE___1.sql3
00242         and correct the conditions!
00243         
00244         """
00245 
00246         (path, filename) = os.path.split(logfile_name)
00247 
00248         params = filename.split("___")
00249         candle = params[0].upper()
00250         step = params[1]
00251         pileup_type = params[2]
00252         if pileup_type == "NOPILEUP":
00253                 pileup_type = ""
00254         elif pileup_type == "LowLumiPileUp":
00255                 pileup_type = "PILEUP"
00256         #conditions = params[3] + "::All"
00257         #event_content = params[4]
00258         
00259         #get the conditions from the SimulationCandles!!
00260         conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= pileup_type)
00261         if conf:
00262                 is_pileup = conf["pileup_type"]
00263                 conditions = conf["conditions"]
00264                 event_content = conf["event_content"]
00265                 return (candle, step, is_pileup, conditions, event_content)
00266         else:
00267                 return (None, None, None, None, None)                   
00268 
00269 """ Get the root file size for the candle, step in current dir """
00270 def getRootFileSize(path, candle, step):
00271         files = os.listdir(path)
00272         root_files = [os.path.join(path, f) for f in files
00273                                  if test_root_file.search(f) 
00274                                         and os.path.isfile(os.path.join(path, f)) ]
00275 
00276         """ get the size of file if it is the root file for current candle and step """
00277         try:
00278                 size = [os.stat(f).st_size for f in root_files
00279                          if f_candle_and_step_inJobID(candle, step, getJobID_fromRootFileName(f))][0]
00280         except Exception, e:
00281                 print e
00282                 return 0
00283         return size
00284 
00285 def read_SimulationCandles(path):
00286         # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
00287         # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
00288         # how to reproduce stuff: TODO
00289 
00290         """ get the acual file """
00291         SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00292                                  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00293 
00294         """ read and parse it;  format: #Version     : CMSSW_3_2_0 """
00295         f = open(SimulationCandles_file, 'r')   
00296         lines = f.readlines()
00297         f.close()
00298 
00299         release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
00300         return release_version
00301 
00302 
00303 if __name__ == "__main__":
00304         import doctest
00305         doctest.testmod()
00306         path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
00307         print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
00308 
00309         #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
00310 
00311 
00312