CMS 3D CMS Logo

FileNamesHelper.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 from __future__ import absolute_import
4 import re, os
5 from . import parsingRulesHelper
6 
7 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
8 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
9 
10 
11 """
12 Includes general functions to work with fileNames and related operations:
13 * getting candle, step etc - JobID from fileName and vice-versa
14  - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
15 * root file size from candle,step
16 * reads simulation candles to get release version
17 
18 """
19 
20 universal_candle_step_regs = {}
21 test_root_file = re.compile(".root$", re.IGNORECASE)
22 
23 
24 """
25 We have Simulation candles lines in format like:
26 
27 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
28 
29 """
30 simCandlesRules = (
31 
32  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
33  (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
34  #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
35  (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
36  (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
37  (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
38  #not shure if event content is required
39  (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
40  (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
41 
42  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
43  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
44 )
45 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
46 
47 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
48  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
49  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
50  # how to reproduce stuff: TODO
51 
52  try:
53  """ get the acual file """
54  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
55  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
56  except IndexError:
57  return None
58 
59  """ read and parse it; format: #Version : CMSSW_3_2_0 """
60  f = open(SimulationCandles_file, 'r')
61 
62  lines = [s.strip() for s in f.readlines()]
63  f.close()
64 
65 
66 
67  """ we call a shared helper to parse the file """
68 
69  for line in lines:
70  #print line
71  #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
72 
73  info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
74  #print info
75  #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
76  if 'auto:' in info['conditions']:
77  from Configuration.AlCa.autoCond import autoCond
78  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
79  else:
80  if 'FrontierConditions_GlobalTag' in info['conditions']:
81  info['conditions']=info['conditions'].split(",")[1]
82  #print (info, missing_fields)
83  #if we successfully parsed the line of simulation candles:
84  if not missing_fields:
85  #we have to match only step and
86  if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
87  # if it's pile up or not:
88  #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
89  return info
90 
91 
92 
93 
94 
95 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
96  #TODO: join together with the one from parseTimingReport.py
97  """
98  Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
99  -- if no pile up returns empty string for pileup type
100 
101  * the candle might include one optional underscore:
102  >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
103  ('PI-_1000', 'GEN,SIM', '', '')
104 
105  * otherwise after candle we have two underscores:
106  >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
107  ('MINBIAS', 'GEN,FASTSIM', '', '')
108 
109  * and lastly we have the PILEUP possibility:
110  >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
111  ('TTBAR', 'DIGI', 'PILEUP', '')
112  """
113  import os
114 
115  # get the actual filename (no path
116  (path, filename) = os.path.split(logfile_name)
117  if givenPath:
118  path = givenPath
119 
120  if suffix not in universal_candle_step_regs:
121  #create and cache a regexp
122  universal_candle_step_regs[suffix] = re.compile( \
123  r"""
124  #candle1_[opt:candle2]_
125  ^([^_]+_[^_]*)_
126 
127  # step
128  ([^_]+)(_PILEUP)?%s$
129  """ % suffix , re.VERBOSE)
130 
131 
132 
133  #print logfile_name
134  result = universal_candle_step_regs[suffix].search(filename)
135  if result:
136  #print result.groups()
137  #print "result: %s" % str(result.groups())
138  candle = result.groups()[0]
139  step = result.groups()[1].replace('-', ',')
140  is_pileup = result.groups()[2]
141  if is_pileup:
142  is_pileup = "PILEUP"
143  else:
144  is_pileup = ""
145 
146  """ if we had the candle without underscore inside (like TTBAR but not E_1000)
147  on the end of result and underscore which needs to be removed """
148 
149  if (candle[-1] == '_'):
150  candle = candle[0:-1]
151 
152  """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
153  conditions = ''
154  event_content = ''
155  try:
156  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
157  if conf:
158  is_pileup = conf["pileup_type"]
159  conditions = conf["conditions"]
160  event_content = conf["event_content"]
161  except OSError as e:
162  pass
163 
164  return (candle, step, is_pileup, conditions, event_content)
165  else:
166  return (None, None, None, None, None)
167 
168 
169 def getJobID_fromRootFileName(logfile_name):
170  """
171  Returns the candle and STEP out of filename:
172 
173  * the candle might include one optional underscore:
174  >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
175  ('PI-_1000', 'GEN,SIM', '', '')
176 
177  * otherwise after candle we have two underscores:
178  >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
179  ('MINBIAS', 'GEN,FASTSIM', '', '')
180 
181  * and lastly we have the PILEUP possibility:
182  >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
183  ('TTBAR', 'DIGI', 'PILEUP', '')
184  """
185  return getJobID_fromFileName(logfile_name, "\\.root")
186 
188  """
189  Returns the candle and STEP out of filename:
190 
191  * the candle might include one optional underscore:
192  >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
193  ('E_1000', 'GEN,SIM', '', '')
194 
195  * otherwise after candle we have two underscores:
196  >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
197  ('TTBAR', 'RAW2DIGI,RECO', '', '')
198 
199  * and lastly we have the PILEUP possibility:
200  >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
201  ('TTBAR', 'GEN,SIM', 'PILEUP', '')
202  """
203  return getJobID_fromFileName(logfile_name, "_EdmSize")
204 
206  """
207  Returns the candle and STEP out of filename:
208 
209  * the candle might include one optional underscore:
210  >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
211  ('E_1000', 'GEN,SIM', '', '')
212 
213  * otherwise after candle we have two underscores:
214  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
215  ('TTBAR', 'RAW2DIGI,RECO', '', '')
216 
217  * and lastly we have the PILEUP possibility:
218  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
219  ('TTBAR', 'DIGI', 'PILEUP', '')
220  """
221  return getJobID_fromFileName(logfile_name, "_TimingReport.log")
222 
224  """
225  Returns the candle and STEP out of filename:
226 
227  * otherwise after candle we have two underscores:
228  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_memcheck_vlgd.xml")
229  ('TTBAR', 'RAW2DIGI,RECO', '', '')
230 
231  * and lastly we have the PILEUP possibility:
232  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_memcheck_vlgd.xml")
233  ('TTBAR', 'DIGI', 'PILEUP', '')
234  """
235  return getJobID_fromFileName(logfile_name, "_memcheck_vlgd.xml")
236 
237 def getJobID_fromIgProfLogName(logfile_name):
238  """
239  Returns the candle and STEP out of .sql3 filename:
240 
241  everything is given, just have to split it...
242  like:
243  TTbar___GEN,FASTSIM___LowLumiPileUp___MC_37Y_V5___RAWSIM___MEM_LIVE___1.sql3
244  and correct the conditions!
245 
246  """
247 
248  (path, filename) = os.path.split(logfile_name)
249 
250  params = filename.split("___")
251  candle = params[0].upper()
252  step = params[1]
253  pileup_type = params[2]
254  if pileup_type == "NOPILEUP":
255  pileup_type = ""
256  elif pileup_type == "LowLumiPileUp":
257  pileup_type = "PILEUP"
258  #conditions = params[3] + "::All"
259  #event_content = params[4]
260 
261  #get the conditions from the SimulationCandles!!
262  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= pileup_type)
263  if conf:
264  is_pileup = conf["pileup_type"]
265  conditions = conf["conditions"]
266  event_content = conf["event_content"]
267  return (candle, step, is_pileup, conditions, event_content)
268  else:
269  return (None, None, None, None, None)
270 
271 """ Get the root file size for the candle, step in current dir """
272 def getRootFileSize(path, candle, step):
273  files = os.listdir(path)
274  root_files = [os.path.join(path, f) for f in files
275  if test_root_file.search(f)
276  and os.path.isfile(os.path.join(path, f)) ]
277 
278  """ get the size of file if it is the root file for current candle and step """
279  try:
280  size = [os.stat(f).st_size for f in root_files
282  except Exception as e:
283  print(e)
284  return 0
285  return size
286 
288  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
289  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
290  # how to reproduce stuff: TODO
291 
292  """ get the acual file """
293  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
294  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
295 
296  """ read and parse it; format: #Version : CMSSW_3_2_0 """
297  f = open(SimulationCandles_file, 'r')
298  lines = f.readlines()
299  f.close()
300 
301  release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
302  return release_version
303 
304 
305 if __name__ == "__main__":
306  import doctest
307  doctest.testmod()
308  path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
309  print("Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log"))))
310 
311  #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
312 
313 
314 
def read_SimulationCandles(path)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:314
def replace(string, replacements)
def getJobID_fromFileName(logfile_name, suffix, givenPath="")
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def getJobID_fromIgProfLogName(logfile_name)
def getJobID_fromEdmSizeFileName(logfile_name)
def getRootFileSize(path, candle, step)
def getJobID_fromRootFileName(logfile_name)
def read_ConfigurationFromSimulationCandles(path, step, is_pileup)
def rulesParser(parsing_rules, lines, compileRules=True)
def getJobID_fromMemcheckLogName(logfile_name)
def getJobID_fromTimeReportLogName(logfile_name)
#define str(s)
double split
Definition: MVATrainer.cc:139