CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch9/src/Validation/Performance/python/FileNamesHelper.py

Go to the documentation of this file.
00001 #!/usr/bin/env python2.4
00002 import re, os
00003 import parsingRulesHelper
00004 
00005 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
00006 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
00007 
00008 
00009 """
00010 Includes general functions to work with fileNames and related operations:
00011 * getting candle, step etc - JobID from fileName and vice-versa
00012   - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
00013 * root file size from candle,step
00014 * reads simulation candles to get release version
00015 
00016 """
00017 
00018 universal_candle_step_regs = {}
00019 test_root_file = re.compile(".root$", re.IGNORECASE)
00020 
00021 
00022 """ 
00023 We have Simulation candles lines in format like: 
00024 
00025 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG  --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
00026 
00027 """
00028 simCandlesRules =  (
00029 
00030                         #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
00031                         (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
00032                         #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
00033                         (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
00034                         (("",  "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
00035                         (("",  "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
00036                         #not shure if event content is required
00037                         (("",  "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00038                         (("",  "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
00039   
00040                         #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
00041                         #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
00042 )
00043 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
00044         
00045 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
00046         # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
00047         # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
00048         # how to reproduce stuff: TODO
00049 
00050         try:
00051                 """ get the acual file """
00052                 SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00053                                          if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00054         except IndexError:
00055                 return None
00056 
00057         """ read and parse it;  format: #Version     : CMSSW_3_2_0 """
00058         f = open(SimulationCandles_file, 'r')   
00059 
00060         lines =  [s.strip() for s in f.readlines()]
00061         f.close()
00062 
00063 
00064 
00065         """ we call a shared helper to parse the file """
00066 
00067         for line in lines:
00068                 #print line
00069                 #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
00070 
00071                 info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
00072                 #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
00073                 if 'auto:' in info['conditions']:
00074                         from Configuration.PyReleaseValidation.autoCond import autoCond
00075                         info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0] 
00076                 else:
00077                         if 'FrontierConditions_GlobalTag' in info['conditions']:
00078                                 info['conditions']=info['conditions'].split(",")[1]
00079                 #print (info, missing_fields)
00080                 #if we successfully parsed the line of simulation candles:
00081                 if not missing_fields:
00082                         #we have to match only step and 
00083                         if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
00084                                 # if it's pile up or not:
00085                                 #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
00086                                 return info
00087                                 
00088 
00089 
00090 
00091 
00092 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
00093         #TODO: join together with the one from parseTimingReport.py
00094         """ 
00095         Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
00096         -- if no pile up returns empty string for pileup type
00097         
00098         * the candle might include one optional underscore:
00099         >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
00100         ('PI-_1000', 'GEN,SIM', '', '')
00101         
00102         * otherwise after candle we have two underscores:
00103         >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
00104         ('MINBIAS', 'GEN,FASTSIM', '', '')
00105         
00106         * and lastly we have the PILEUP possibility:
00107         >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
00108         ('TTBAR', 'DIGI', 'PILEUP', '')
00109         """
00110         import os
00111         
00112         # get the actual filename (no path)
00113         (path, filename) = os.path.split(logfile_name)
00114         if givenPath:
00115                 path = givenPath
00116         
00117         if not universal_candle_step_regs.has_key(suffix):
00118                 #create and cache a regexp
00119                 universal_candle_step_regs[suffix] = re.compile( \
00120                         r"""
00121                         #candle1_[opt:candle2]_         
00122                         ^([^_]+_[^_]*)_
00123 
00124                         # step
00125                         ([^_]+)(_PILEUP)?%s$
00126                 """ % suffix , re.VERBOSE)
00127 
00128         
00129 
00130         #print logfile_name
00131         result = universal_candle_step_regs[suffix].search(filename)
00132         if result:
00133                 #print result.groups()
00134                 #print "result: %s" % str(result.groups())
00135                 candle = result.groups()[0]
00136                 step = result.groups()[1]
00137                 is_pileup = result.groups()[2]
00138                 if is_pileup:
00139                         is_pileup = "PILEUP"
00140                 else:
00141                         is_pileup = ""
00142                 
00143                 """ if we had the candle without underscore inside (like TTBAR but not E_1000) 
00144                 on the end of result and underscore which needs to be removed """
00145                 
00146                 if (candle[-1] == '_'):
00147                         candle = candle[0:-1]
00148 
00149                 """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
00150                 conditions = ''
00151                 event_content = ''
00152                 try:
00153                         conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
00154                         if conf:
00155                                 is_pileup = conf["pileup_type"]
00156                                 conditions = conf["conditions"]
00157                                 event_content = conf["event_content"]
00158                 except OSError, e:
00159                         pass
00160 
00161                 return (candle, step, is_pileup, conditions, event_content)
00162         else:
00163                 return (None, None, None, None, None)
00164 
00165 
00166 def getJobID_fromRootFileName(logfile_name):
00167         """ 
00168         Returns the candle and STEP out of filename:
00169         
00170         * the candle might include one optional underscore:
00171         >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
00172         ('PI-_1000', 'GEN,SIM', '', '')
00173         
00174         * otherwise after candle we have two underscores:
00175         >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
00176         ('MINBIAS', 'GEN,FASTSIM', '', '')
00177         
00178         * and lastly we have the PILEUP possibility:
00179         >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
00180         ('TTBAR', 'DIGI', 'PILEUP', '')
00181         """
00182         return getJobID_fromFileName(logfile_name, "\\.root")
00183 
00184 def getJobID_fromEdmSizeFileName(logfile_name):
00185         """ 
00186         Returns the candle and STEP out of filename:
00187         
00188         * the candle might include one optional underscore:
00189         >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
00190         ('E_1000', 'GEN,SIM', '', '')
00191         
00192         * otherwise after candle we have two underscores:
00193         >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
00194         ('TTBAR', 'RAW2DIGI,RECO', '', '')
00195         
00196         * and lastly we have the PILEUP possibility:
00197         >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
00198         ('TTBAR', 'GEN,SIM', 'PILEUP', '')
00199         """
00200         return getJobID_fromFileName(logfile_name, "_EdmSize")
00201 
00202 def getJobID_fromTimeReportLogName(logfile_name):
00203         """ 
00204         Returns the candle and STEP out of filename:
00205         
00206         * the candle might include one optional underscore:
00207         >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
00208         ('E_1000', 'GEN,SIM', '', '')
00209         
00210         * otherwise after candle we have two underscores:
00211         >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
00212         ('TTBAR', 'RAW2DIGI,RECO', '', '')
00213         
00214         * and lastly we have the PILEUP possibility:
00215         >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
00216         ('TTBAR', 'DIGI', 'PILEUP', '')
00217         """
00218         return getJobID_fromFileName(logfile_name, "_TimingReport.log") 
00219 
00220 
00221 """ Get the root file size for the candle, step in current dir """
00222 def getRootFileSize(path, candle, step):
00223         files = os.listdir(path)
00224         root_files = [os.path.join(path, f) for f in files
00225                                  if test_root_file.search(f) 
00226                                         and os.path.isfile(os.path.join(path, f)) ]
00227 
00228         """ get the size of file if it is the root file for current candle and step """
00229         try:
00230                 size = [os.stat(f).st_size for f in root_files
00231                          if f_candle_and_step_inJobID(candle, step, getJobID_fromRootFileName(f))][0]
00232         except Exception, e:
00233                 print e
00234                 return 0
00235         return size
00236 
00237 def read_SimulationCandles(path):
00238         # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
00239         # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
00240         # how to reproduce stuff: TODO
00241 
00242         """ get the acual file """
00243         SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
00244                                  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
00245 
00246         """ read and parse it;  format: #Version     : CMSSW_3_2_0 """
00247         f = open(SimulationCandles_file, 'r')   
00248         lines = f.readlines()
00249         f.close()
00250 
00251         release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
00252         return release_version
00253 
00254 
00255 if __name__ == "__main__":
00256         import doctest
00257         doctest.testmod()
00258         path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
00259         print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
00260 
00261         #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
00262 
00263 
00264