CMS 3D CMS Logo

FileNamesHelper.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import re, os
3 import parsingRulesHelper
4 
5 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
6 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
7 
8 
9 """
10 Includes general functions to work with fileNames and related operations:
11 * getting candle, step etc - JobID from fileName and vice-versa
12  - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
13 * root file size from candle,step
14 * reads simulation candles to get release version
15 
16 """
17 
18 universal_candle_step_regs = {}
19 test_root_file = re.compile(".root$", re.IGNORECASE)
20 
21 
22 """
23 We have Simulation candles lines in format like:
24 
25 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
26 
27 """
28 simCandlesRules = (
29 
30  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
31  (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
32  #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
33  (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
34  (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
35  (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
36  #not shure if event content is required
37  (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
38  (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
39 
40  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
41  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
42 )
43 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
44 
45 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
46  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
47  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
48  # how to reproduce stuff: TODO
49 
50  try:
51  """ get the acual file """
52  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
53  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
54  except IndexError:
55  return None
56 
57  """ read and parse it; format: #Version : CMSSW_3_2_0 """
58  f = open(SimulationCandles_file, 'r')
59 
60  lines = [s.strip() for s in f.readlines()]
61  f.close()
62 
63 
64 
65  """ we call a shared helper to parse the file """
66 
67  for line in lines:
68  #print line
69  #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
70 
71  info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
72  #print info
73  #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
74  if 'auto:' in info['conditions']:
75  from Configuration.AlCa.autoCond import autoCond
76  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
77  else:
78  if 'FrontierConditions_GlobalTag' in info['conditions']:
79  info['conditions']=info['conditions'].split(",")[1]
80  #print (info, missing_fields)
81  #if we successfully parsed the line of simulation candles:
82  if not missing_fields:
83  #we have to match only step and
84  if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
85  # if it's pile up or not:
86  #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
87  return info
88 
89 
90 
91 
92 
93 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
94  #TODO: join together with the one from parseTimingReport.py
95  """
96  Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
97  -- if no pile up returns empty string for pileup type
98 
99  * the candle might include one optional underscore:
100  >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
101  ('PI-_1000', 'GEN,SIM', '', '')
102 
103  * otherwise after candle we have two underscores:
104  >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
105  ('MINBIAS', 'GEN,FASTSIM', '', '')
106 
107  * and lastly we have the PILEUP possibility:
108  >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
109  ('TTBAR', 'DIGI', 'PILEUP', '')
110  """
111  import os
112 
113  # get the actual filename (no path
114  (path, filename) = os.path.split(logfile_name)
115  if givenPath:
116  path = givenPath
117 
118  if suffix not in universal_candle_step_regs:
119  #create and cache a regexp
120  universal_candle_step_regs[suffix] = re.compile( \
121  r"""
122  #candle1_[opt:candle2]_
123  ^([^_]+_[^_]*)_
124 
125  # step
126  ([^_]+)(_PILEUP)?%s$
127  """ % suffix , re.VERBOSE)
128 
129 
130 
131  #print logfile_name
132  result = universal_candle_step_regs[suffix].search(filename)
133  if result:
134  #print result.groups()
135  #print "result: %s" % str(result.groups())
136  candle = result.groups()[0]
137  step = result.groups()[1].replace('-', ',')
138  is_pileup = result.groups()[2]
139  if is_pileup:
140  is_pileup = "PILEUP"
141  else:
142  is_pileup = ""
143 
144  """ if we had the candle without underscore inside (like TTBAR but not E_1000)
145  on the end of result and underscore which needs to be removed """
146 
147  if (candle[-1] == '_'):
148  candle = candle[0:-1]
149 
150  """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
151  conditions = ''
152  event_content = ''
153  try:
154  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
155  if conf:
156  is_pileup = conf["pileup_type"]
157  conditions = conf["conditions"]
158  event_content = conf["event_content"]
159  except OSError as e:
160  pass
161 
162  return (candle, step, is_pileup, conditions, event_content)
163  else:
164  return (None, None, None, None, None)
165 
166 
167 def getJobID_fromRootFileName(logfile_name):
168  """
169  Returns the candle and STEP out of filename:
170 
171  * the candle might include one optional underscore:
172  >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
173  ('PI-_1000', 'GEN,SIM', '', '')
174 
175  * otherwise after candle we have two underscores:
176  >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
177  ('MINBIAS', 'GEN,FASTSIM', '', '')
178 
179  * and lastly we have the PILEUP possibility:
180  >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
181  ('TTBAR', 'DIGI', 'PILEUP', '')
182  """
183  return getJobID_fromFileName(logfile_name, "\\.root")
184 
186  """
187  Returns the candle and STEP out of filename:
188 
189  * the candle might include one optional underscore:
190  >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
191  ('E_1000', 'GEN,SIM', '', '')
192 
193  * otherwise after candle we have two underscores:
194  >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
195  ('TTBAR', 'RAW2DIGI,RECO', '', '')
196 
197  * and lastly we have the PILEUP possibility:
198  >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
199  ('TTBAR', 'GEN,SIM', 'PILEUP', '')
200  """
201  return getJobID_fromFileName(logfile_name, "_EdmSize")
202 
204  """
205  Returns the candle and STEP out of filename:
206 
207  * the candle might include one optional underscore:
208  >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
209  ('E_1000', 'GEN,SIM', '', '')
210 
211  * otherwise after candle we have two underscores:
212  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
213  ('TTBAR', 'RAW2DIGI,RECO', '', '')
214 
215  * and lastly we have the PILEUP possibility:
216  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
217  ('TTBAR', 'DIGI', 'PILEUP', '')
218  """
219  return getJobID_fromFileName(logfile_name, "_TimingReport.log")
220 
222  """
223  Returns the candle and STEP out of filename:
224 
225  * otherwise after candle we have two underscores:
226  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_memcheck_vlgd.xml")
227  ('TTBAR', 'RAW2DIGI,RECO', '', '')
228 
229  * and lastly we have the PILEUP possibility:
230  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_memcheck_vlgd.xml")
231  ('TTBAR', 'DIGI', 'PILEUP', '')
232  """
233  return getJobID_fromFileName(logfile_name, "_memcheck_vlgd.xml")
234 
235 def getJobID_fromIgProfLogName(logfile_name):
236  """
237  Returns the candle and STEP out of .sql3 filename:
238 
239  everything is given, just have to split it...
240  like:
241  TTbar___GEN,FASTSIM___LowLumiPileUp___MC_37Y_V5___RAWSIM___MEM_LIVE___1.sql3
242  and correct the conditions!
243 
244  """
245 
246  (path, filename) = os.path.split(logfile_name)
247 
248  params = filename.split("___")
249  candle = params[0].upper()
250  step = params[1]
251  pileup_type = params[2]
252  if pileup_type == "NOPILEUP":
253  pileup_type = ""
254  elif pileup_type == "LowLumiPileUp":
255  pileup_type = "PILEUP"
256  #conditions = params[3] + "::All"
257  #event_content = params[4]
258 
259  #get the conditions from the SimulationCandles!!
260  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= pileup_type)
261  if conf:
262  is_pileup = conf["pileup_type"]
263  conditions = conf["conditions"]
264  event_content = conf["event_content"]
265  return (candle, step, is_pileup, conditions, event_content)
266  else:
267  return (None, None, None, None, None)
268 
269 """ Get the root file size for the candle, step in current dir """
270 def getRootFileSize(path, candle, step):
271  files = os.listdir(path)
272  root_files = [os.path.join(path, f) for f in files
273  if test_root_file.search(f)
274  and os.path.isfile(os.path.join(path, f)) ]
275 
276  """ get the size of file if it is the root file for current candle and step """
277  try:
278  size = [os.stat(f).st_size for f in root_files
280  except Exception as e:
281  print e
282  return 0
283  return size
284 
286  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
287  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
288  # how to reproduce stuff: TODO
289 
290  """ get the acual file """
291  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
292  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
293 
294  """ read and parse it; format: #Version : CMSSW_3_2_0 """
295  f = open(SimulationCandles_file, 'r')
296  lines = f.readlines()
297  f.close()
298 
299  release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
300  return release_version
301 
302 
303 if __name__ == "__main__":
304  import doctest
305  doctest.testmod()
306  path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
307  print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
308 
309  #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
310 
311 
312 
def read_SimulationCandles(path)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:314
def replace(string, replacements)
def getJobID_fromFileName(logfile_name, suffix, givenPath="")
def getJobID_fromIgProfLogName(logfile_name)
def getJobID_fromEdmSizeFileName(logfile_name)
def getRootFileSize(path, candle, step)
def getJobID_fromRootFileName(logfile_name)
def read_ConfigurationFromSimulationCandles(path, step, is_pileup)
def rulesParser(parsing_rules, lines, compileRules=True)
def getJobID_fromMemcheckLogName(logfile_name)
def getJobID_fromTimeReportLogName(logfile_name)
#define str(s)
double split
Definition: MVATrainer.cc:139