CMS 3D CMS Logo

FileNamesHelper.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import re, os
4 import parsingRulesHelper
5 
6 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
7 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
8 
9 
10 """
11 Includes general functions to work with fileNames and related operations:
12 * getting candle, step etc - JobID from fileName and vice-versa
13  - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
14 * root file size from candle,step
15 * reads simulation candles to get release version
16 
17 """
18 
19 universal_candle_step_regs = {}
20 test_root_file = re.compile(".root$", re.IGNORECASE)
21 
22 
23 """
24 We have Simulation candles lines in format like:
25 
26 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
27 
28 """
29 simCandlesRules = (
30 
31  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
32  (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
33  #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
34  (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
35  (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
36  (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
37  #not shure if event content is required
38  (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
39  (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
40 
41  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
42  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
43 )
44 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
45 
46 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
47  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
48  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
49  # how to reproduce stuff: TODO
50 
51  try:
52  """ get the acual file """
53  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
54  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
55  except IndexError:
56  return None
57 
58  """ read and parse it; format: #Version : CMSSW_3_2_0 """
59  f = open(SimulationCandles_file, 'r')
60 
61  lines = [s.strip() for s in f.readlines()]
62  f.close()
63 
64 
65 
66  """ we call a shared helper to parse the file """
67 
68  for line in lines:
69  #print line
70  #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
71 
72  info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
73  #print info
74  #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
75  if 'auto:' in info['conditions']:
76  from Configuration.AlCa.autoCond import autoCond
77  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
78  else:
79  if 'FrontierConditions_GlobalTag' in info['conditions']:
80  info['conditions']=info['conditions'].split(",")[1]
81  #print (info, missing_fields)
82  #if we successfully parsed the line of simulation candles:
83  if not missing_fields:
84  #we have to match only step and
85  if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
86  # if it's pile up or not:
87  #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
88  return info
89 
90 
91 
92 
93 
94 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
95  #TODO: join together with the one from parseTimingReport.py
96  """
97  Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
98  -- if no pile up returns empty string for pileup type
99 
100  * the candle might include one optional underscore:
101  >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
102  ('PI-_1000', 'GEN,SIM', '', '')
103 
104  * otherwise after candle we have two underscores:
105  >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
106  ('MINBIAS', 'GEN,FASTSIM', '', '')
107 
108  * and lastly we have the PILEUP possibility:
109  >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
110  ('TTBAR', 'DIGI', 'PILEUP', '')
111  """
112  import os
113 
114  # get the actual filename (no path
115  (path, filename) = os.path.split(logfile_name)
116  if givenPath:
117  path = givenPath
118 
119  if suffix not in universal_candle_step_regs:
120  #create and cache a regexp
121  universal_candle_step_regs[suffix] = re.compile( \
122  r"""
123  #candle1_[opt:candle2]_
124  ^([^_]+_[^_]*)_
125 
126  # step
127  ([^_]+)(_PILEUP)?%s$
128  """ % suffix , re.VERBOSE)
129 
130 
131 
132  #print logfile_name
133  result = universal_candle_step_regs[suffix].search(filename)
134  if result:
135  #print result.groups()
136  #print "result: %s" % str(result.groups())
137  candle = result.groups()[0]
138  step = result.groups()[1].replace('-', ',')
139  is_pileup = result.groups()[2]
140  if is_pileup:
141  is_pileup = "PILEUP"
142  else:
143  is_pileup = ""
144 
145  """ if we had the candle without underscore inside (like TTBAR but not E_1000)
146  on the end of result and underscore which needs to be removed """
147 
148  if (candle[-1] == '_'):
149  candle = candle[0:-1]
150 
151  """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
152  conditions = ''
153  event_content = ''
154  try:
155  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
156  if conf:
157  is_pileup = conf["pileup_type"]
158  conditions = conf["conditions"]
159  event_content = conf["event_content"]
160  except OSError as e:
161  pass
162 
163  return (candle, step, is_pileup, conditions, event_content)
164  else:
165  return (None, None, None, None, None)
166 
167 
168 def getJobID_fromRootFileName(logfile_name):
169  """
170  Returns the candle and STEP out of filename:
171 
172  * the candle might include one optional underscore:
173  >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
174  ('PI-_1000', 'GEN,SIM', '', '')
175 
176  * otherwise after candle we have two underscores:
177  >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
178  ('MINBIAS', 'GEN,FASTSIM', '', '')
179 
180  * and lastly we have the PILEUP possibility:
181  >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
182  ('TTBAR', 'DIGI', 'PILEUP', '')
183  """
184  return getJobID_fromFileName(logfile_name, "\\.root")
185 
187  """
188  Returns the candle and STEP out of filename:
189 
190  * the candle might include one optional underscore:
191  >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
192  ('E_1000', 'GEN,SIM', '', '')
193 
194  * otherwise after candle we have two underscores:
195  >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
196  ('TTBAR', 'RAW2DIGI,RECO', '', '')
197 
198  * and lastly we have the PILEUP possibility:
199  >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
200  ('TTBAR', 'GEN,SIM', 'PILEUP', '')
201  """
202  return getJobID_fromFileName(logfile_name, "_EdmSize")
203 
205  """
206  Returns the candle and STEP out of filename:
207 
208  * the candle might include one optional underscore:
209  >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
210  ('E_1000', 'GEN,SIM', '', '')
211 
212  * otherwise after candle we have two underscores:
213  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
214  ('TTBAR', 'RAW2DIGI,RECO', '', '')
215 
216  * and lastly we have the PILEUP possibility:
217  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
218  ('TTBAR', 'DIGI', 'PILEUP', '')
219  """
220  return getJobID_fromFileName(logfile_name, "_TimingReport.log")
221 
223  """
224  Returns the candle and STEP out of filename:
225 
226  * otherwise after candle we have two underscores:
227  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_memcheck_vlgd.xml")
228  ('TTBAR', 'RAW2DIGI,RECO', '', '')
229 
230  * and lastly we have the PILEUP possibility:
231  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_memcheck_vlgd.xml")
232  ('TTBAR', 'DIGI', 'PILEUP', '')
233  """
234  return getJobID_fromFileName(logfile_name, "_memcheck_vlgd.xml")
235 
236 def getJobID_fromIgProfLogName(logfile_name):
237  """
238  Returns the candle and STEP out of .sql3 filename:
239 
240  everything is given, just have to split it...
241  like:
242  TTbar___GEN,FASTSIM___LowLumiPileUp___MC_37Y_V5___RAWSIM___MEM_LIVE___1.sql3
243  and correct the conditions!
244 
245  """
246 
247  (path, filename) = os.path.split(logfile_name)
248 
249  params = filename.split("___")
250  candle = params[0].upper()
251  step = params[1]
252  pileup_type = params[2]
253  if pileup_type == "NOPILEUP":
254  pileup_type = ""
255  elif pileup_type == "LowLumiPileUp":
256  pileup_type = "PILEUP"
257  #conditions = params[3] + "::All"
258  #event_content = params[4]
259 
260  #get the conditions from the SimulationCandles!!
261  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= pileup_type)
262  if conf:
263  is_pileup = conf["pileup_type"]
264  conditions = conf["conditions"]
265  event_content = conf["event_content"]
266  return (candle, step, is_pileup, conditions, event_content)
267  else:
268  return (None, None, None, None, None)
269 
270 """ Get the root file size for the candle, step in current dir """
271 def getRootFileSize(path, candle, step):
272  files = os.listdir(path)
273  root_files = [os.path.join(path, f) for f in files
274  if test_root_file.search(f)
275  and os.path.isfile(os.path.join(path, f)) ]
276 
277  """ get the size of file if it is the root file for current candle and step """
278  try:
279  size = [os.stat(f).st_size for f in root_files
281  except Exception as e:
282  print(e)
283  return 0
284  return size
285 
287  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
288  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
289  # how to reproduce stuff: TODO
290 
291  """ get the acual file """
292  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
293  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
294 
295  """ read and parse it; format: #Version : CMSSW_3_2_0 """
296  f = open(SimulationCandles_file, 'r')
297  lines = f.readlines()
298  f.close()
299 
300  release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
301  return release_version
302 
303 
304 if __name__ == "__main__":
305  import doctest
306  doctest.testmod()
307  path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
308  print("Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log"))))
309 
310  #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
311 
312 
313 
def read_SimulationCandles(path)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:314
def replace(string, replacements)
def getJobID_fromFileName(logfile_name, suffix, givenPath="")
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
def getJobID_fromIgProfLogName(logfile_name)
def getJobID_fromEdmSizeFileName(logfile_name)
def getRootFileSize(path, candle, step)
def getJobID_fromRootFileName(logfile_name)
def read_ConfigurationFromSimulationCandles(path, step, is_pileup)
def rulesParser(parsing_rules, lines, compileRules=True)
def getJobID_fromMemcheckLogName(logfile_name)
def getJobID_fromTimeReportLogName(logfile_name)
#define str(s)
double split
Definition: MVATrainer.cc:139