CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
FileNamesHelper.py
Go to the documentation of this file.
1 #!/usr/bin/env python2.4
2 import re, os
3 import parsingRulesHelper
4 
5 """ a lambda fucntion which checks only two first parts of tuple: candle and step of the JobID"""
6 f_candle_and_step_inJobID = lambda candle, step, x: x[0] == candle and x[1] == step
7 
8 
9 """
10 Includes general functions to work with fileNames and related operations:
11 * getting candle, step etc - JobID from fileName and vice-versa
12  - includes conditions, pileup_type, event_content <-- read this from Simulationcandles [TODO: we have it in this module for simplicity, might be moved later]
13 * root file size from candle,step
14 * reads simulation candles to get release version
15 
16 """
17 
18 universal_candle_step_regs = {}
19 test_root_file = re.compile(".root$", re.IGNORECASE)
20 
21 
22 """
23 We have Simulation candles lines in format like:
24 
25 cmsDriver.py TTbar_Tauola.cfi -n 100 --step=DIGI --filein file:TTBAR__GEN,SIM_PILEUP.root --fileout=TTBAR__DIGI_PILEUP.root --customise=Validation/Performance/MixingModule.py --conditions FrontierConditions_GlobalTag,MC_31X_V3::All --eventcontent FEVTDEBUG --pileup=LowLumiPileUp @@@ Timing_Parser @@@ TTBAR__DIGI_PILEUP_TimingReport @@@ reuse
26 
27 """
28 simCandlesRules = (
29 
30  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
31  (("cms_driver_options", ), r"""^cmsDriver.py(.+)$"""),
32  #Changing the following to allow for new cmsDriver.py --conditions option (that can optionally drop the FrontierConditions_GlobalTag,)
33  (("", "conditions", ""), r"""^cmsDriver.py(.*)--conditions ([^\s]+)(.*)$""", "req"),
34  (("", "pileup_type", ""), r"""^cmsDriver.py(.*)--pileup=([^\s]+)(.*)$"""),
35  (("", "step", ""), r"""^cmsDriver.py(.*)--step=([^\s]+)(.*)$""", "req"),
36  #not shure if event content is required
37  (("", "event_content", ""), r"""^cmsDriver.py(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
38  (("", "num_events", ""), r"""^cmsDriver.py(.*)-n ([^\s]+)(.*)$""", "req"),
39 
40  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
41  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
42 )
43 simCandlesRules = map(parsingRulesHelper.rulesRegexpCompileFunction, simCandlesRules)
44 
45 def read_ConfigurationFromSimulationCandles(path, step, is_pileup):
46  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
47  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
48  # how to reproduce stuff: TODO
49 
50  try:
51  """ get the acual file """
52  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
53  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
54  except IndexError:
55  return None
56 
57  """ read and parse it; format: #Version : CMSSW_3_2_0 """
58  f = open(SimulationCandles_file, 'r')
59 
60  lines = [s.strip() for s in f.readlines()]
61  f.close()
62 
63 
64 
65  """ we call a shared helper to parse the file """
66 
67  for line in lines:
68  #print line
69  #print simCandlesRules[2][1].match(line) and simCandlesRules[2][1].match(line).groups() or ""
70 
71  info, missing_fields = parsingRulesHelper.rulesParser(simCandlesRules, [line], compileRules = False)
72  #Massaging the info dictionary conditions entry to allow for new cmsDriver.py --conditions option:
73  if 'auto:' in info['conditions']:
74  from Configuration.PyReleaseValidation.autoCond import autoCond
75  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
76  else:
77  if 'FrontierConditions_GlobalTag' in info['conditions']:
78  info['conditions']=info['conditions'].split(",")[1]
79  #print (info, missing_fields)
80  #if we successfully parsed the line of simulation candles:
81  if not missing_fields:
82  #we have to match only step and
83  if info["step"].strip() == step.strip() and ((not is_pileup and not info["pileup_type"]) or (is_pileup and info["pileup_type"])):
84  # if it's pile up or not:
85  #print "Info for <<%s, %s>>: %s" % (str(step), str(is_pileup), str(info))
86  return info
87 
88 
89 
90 
91 
92 def getJobID_fromFileName(logfile_name, suffix, givenPath =""):
93  #TODO: join together with the one from parseTimingReport.py
94  """
95  Returns the JobID (candle, step, pileup_type, conditions, event_content) out of filename
96  -- if no pile up returns empty string for pileup type
97 
98  * the candle might include one optional underscore:
99  >>> getJobID_fromFileName("PI-_1000_GEN,SIM.root", "\.root")
100  ('PI-_1000', 'GEN,SIM', '', '')
101 
102  * otherwise after candle we have two underscores:
103  >>> getJobID_fromFileName("MINBIAS__GEN,FASTSIM.root", "\.root")
104  ('MINBIAS', 'GEN,FASTSIM', '', '')
105 
106  * and lastly we have the PILEUP possibility:
107  >>> getJobID_fromFileName("TTBAR__DIGI_PILEUP.root", "\.root")
108  ('TTBAR', 'DIGI', 'PILEUP', '')
109  """
110  import os
111 
112  # get the actual filename (no path)
113  (path, filename) = os.path.split(logfile_name)
114  if givenPath:
115  path = givenPath
116 
117  if not universal_candle_step_regs.has_key(suffix):
118  #create and cache a regexp
119  universal_candle_step_regs[suffix] = re.compile( \
120  r"""
121  #candle1_[opt:candle2]_
122  ^([^_]+_[^_]*)_
123 
124  # step
125  ([^_]+)(_PILEUP)?%s$
126  """ % suffix , re.VERBOSE)
127 
128 
129 
130  #print logfile_name
131  result = universal_candle_step_regs[suffix].search(filename)
132  if result:
133  #print result.groups()
134  #print "result: %s" % str(result.groups())
135  candle = result.groups()[0]
136  step = result.groups()[1]
137  is_pileup = result.groups()[2]
138  if is_pileup:
139  is_pileup = "PILEUP"
140  else:
141  is_pileup = ""
142 
143  """ if we had the candle without underscore inside (like TTBAR but not E_1000)
144  on the end of result and underscore which needs to be removed """
145 
146  if (candle[-1] == '_'):
147  candle = candle[0:-1]
148 
149  """ try to fetch the conditions and real pileup type if the SimulationCandles.txt is existing """
150  conditions = ''
151  event_content = ''
152  try:
153  conf = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup= is_pileup)
154  if conf:
155  is_pileup = conf["pileup_type"]
156  conditions = conf["conditions"]
157  event_content = conf["event_content"]
158  except OSError, e:
159  pass
160 
161  return (candle, step, is_pileup, conditions, event_content)
162  else:
163  return (None, None, None, None, None)
164 
165 
166 def getJobID_fromRootFileName(logfile_name):
167  """
168  Returns the candle and STEP out of filename:
169 
170  * the candle might include one optional underscore:
171  >>> getJobID_fromRootFileName("PI-_1000_GEN,SIM.root")
172  ('PI-_1000', 'GEN,SIM', '', '')
173 
174  * otherwise after candle we have two underscores:
175  >>> getJobID_fromRootFileName("MINBIAS__GEN,FASTSIM.root")
176  ('MINBIAS', 'GEN,FASTSIM', '', '')
177 
178  * and lastly we have the PILEUP possibility:
179  >>> getJobID_fromRootFileName("TTBAR__DIGI_PILEUP.root")
180  ('TTBAR', 'DIGI', 'PILEUP', '')
181  """
182  return getJobID_fromFileName(logfile_name, "\\.root")
183 
185  """
186  Returns the candle and STEP out of filename:
187 
188  * the candle might include one optional underscore:
189  >>> getJobID_fromEdmSizeFileName("E_1000_GEN,SIM_EdmSize")
190  ('E_1000', 'GEN,SIM', '', '')
191 
192  * otherwise after candle we have two underscores:
193  >>> getJobID_fromEdmSizeFileName("TTBAR__RAW2DIGI,RECO_EdmSize")
194  ('TTBAR', 'RAW2DIGI,RECO', '', '')
195 
196  * and lastly we have the PILEUP possibility:
197  >>> getJobID_fromEdmSizeFileName("TTBAR__GEN,SIM_PILEUP_EdmSize")
198  ('TTBAR', 'GEN,SIM', 'PILEUP', '')
199  """
200  return getJobID_fromFileName(logfile_name, "_EdmSize")
201 
203  """
204  Returns the candle and STEP out of filename:
205 
206  * the candle might include one optional underscore:
207  >>> getJobID_fromTimeReportLogName("E_1000_GEN,SIM_TimingReport.log")
208  ('E_1000', 'GEN,SIM', '', '')
209 
210  * otherwise after candle we have two underscores:
211  >>> getJobID_fromTimeReportLogName("test_data/TTBAR__RAW2DIGI,RECO_TimingReport.log")
212  ('TTBAR', 'RAW2DIGI,RECO', '', '')
213 
214  * and lastly we have the PILEUP possibility:
215  >>> getJobID_fromTimeReportLogName("TTBAR__DIGI_PILEUP_TimingReport.log")
216  ('TTBAR', 'DIGI', 'PILEUP', '')
217  """
218  return getJobID_fromFileName(logfile_name, "_TimingReport.log")
219 
220 
221 """ Get the root file size for the candle, step in current dir """
222 def getRootFileSize(path, candle, step):
223  files = os.listdir(path)
224  root_files = [os.path.join(path, f) for f in files
225  if test_root_file.search(f)
226  and os.path.isfile(os.path.join(path, f)) ]
227 
228  """ get the size of file if it is the root file for current candle and step """
229  try:
230  size = [os.stat(f).st_size for f in root_files
232  except Exception, e:
233  print e
234  return 0
235  return size
236 
238  # Here we parse SimulationCandles_<version: e.g. CMSSW_3_2_0>.txt which contains
239  # release:TODO, release_base [path] - we can put it to release [but it's of different granularity]
240  # how to reproduce stuff: TODO
241 
242  """ get the acual file """
243  SimulationCandles_file = [os.path.join(path, f) for f in os.listdir(path)
244  if os.path.isfile(os.path.join(path, f)) and f.startswith("SimulationCandles_")][0]
245 
246  """ read and parse it; format: #Version : CMSSW_3_2_0 """
247  f = open(SimulationCandles_file, 'r')
248  lines = f.readlines()
249  f.close()
250 
251  release_version =[[a.strip() for a in line.split(":")] for line in lines if line.startswith("#Version")][0][1]
252  return release_version
253 
254 
255 if __name__ == "__main__":
256  import doctest
257  doctest.testmod()
258  path = path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI/TTbar_PU_TimeSize"
259  print "Job ID: " + str(getJobID_fromTimeReportLogName(os.path.join(path, "TTBAR__DIGI_PILEUP_TimingReport.log")))
260 
261  #read_ConfigurationFromSimulationCandles(, step = "DIGI", is_pileup= "PILEUP")
262 
263 
264 
def getJobID_fromTimeReportLogName
void strip(std::string &input, const std::string &blanks=" \n\t")
Definition: stringTools.cc:16
dictionary map
Definition: Association.py:160
list f_candle_and_step_inJobID
def read_ConfigurationFromSimulationCandles
double split
Definition: MVATrainer.cc:139
def getJobID_fromEdmSizeFileName