CMS 3D CMS Logo

parserPerfsuiteMetadata.py
Go to the documentation of this file.
1 import re
2 import os, sys
3 import time
4 import parsingRulesHelper
5 import glob
6 from commands import getstatusoutput
7 
9  """
10  The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
11 
12  * General info
13  As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
14  Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
15 
16  * TimeSize test
17  We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
18 
19  * All other tests
20  We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
21  The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
22  """
23  _LINE_SEPARATOR = "|"
24  def validateSteps(self, steps):
25  """ Simple function for error detection. TODO: we could use a list of possible steps also """
26  return not (not steps or len(steps) > self._MAX_STEPS)
27 
28  def __init__(self, path):
29 
30  self._MAX_STEPS = 5 # MAXIMUM NUMBER OF STEPS PER RUN (taskset relvalreport.py...)
31  self._DEBUG = False
32 
33 
34  self._path = path
35 
36  """ some initialisation to speedup the other functions """
37  #for cmsscimark
38  self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
39 
40  #TimeSize
41  """ the separator for beginning of timeSize / end of general statistics """
42  self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
43  """ (the first timestamp is the start of TimeSize) """
44 
45 
46  """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
47  self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
48 
49  #Other tests:
50  self._otherStart = re.compile(r"^Preparing")
51 
52  """
53  ----- READ THE DATA -----
54  """
55  lines = self.readInput(path)
56  """ split the whole file into parts """
57  #Let's not assume there are ALWAYS TimeSize tests in the runs of the Performance Suite!:
58  #Check first:
59  #FIXME: Vidmantas did not think to this case... will need to implement protectionb against it for all the IB tests...
60  #To do as soon as possible...
61  #Maybe revisit the strategy if it can be done quickly.
62  timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
63  if timesize_end:
64  timesize_end_index = timesize_end[0]
65  else:
66  timesize_end_index=0
67  timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
68  general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
69  if timesize_start:
70  timesize_start_index = timesize_start[0]
71  general_stop_index = timesize_start_index
72  elif general_stop:
73  timesize_start_index=timesize_end_index+1
74  general_stop_index=general_stop[0]
75  else:
76  timesize_start_index=0
77  general_stop_index=-1
78 
79  """ we split the structure:
80  * general
81  * timesize
82  * all others [igprof etc]
83  """
84 
85  """ we get the indexes of spliting """
86  #Not OK to use timsize_start_index for the general lines... want to be general, also to cases of no TimeSize tests...
87  #self.lines_general = lines[:timesize_start_index]
88  self.lines_general = lines[:general_stop_index]
89  self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
90  self.lines_other = lines[timesize_end_index:]
91 
92  """ a list of missing fields """
93  self.missing_fields = []
94 
95  @staticmethod
96  def isTimeStamp(line):
97  """
98  Returns whether the string is a timestamp (if not returns None)
99 
100  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
101  True
102  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
103 
104  """
105  datetime_format = "%a %b %d %H:%M:%S %Y" # we use default date format
106  try:
107  time.strptime(line, datetime_format)
108  return True
109  except ValueError:
110  return None
111 
112  @staticmethod
113  def findFirstIndex_ofStartsWith(job_lines, start_of_line):
114  return [job_lines.index(line)
115  for line in job_lines
116  if line.startswith(start_of_line)][0]
117 
118  def findLineBefore(self, line_index, lines, test_condition):
119  """ finds a line satisfying the `test_condition` comming before the `line_index` """
120  # we're going backwards the lines list
121  for line_index in xrange(line_index -1, -1, -1):
122  line = lines[line_index]
123 
124  if test_condition(line):
125  return line
126  raise ValueError
127 
128 
129  def findLineAfter(self, line_index, lines, test_condition, return_index = False):
130  """ finds a line satisfying the `test_condition` comming after the `line_index` """
131  # we're going forward the lines list
132  for line_index in xrange(line_index + 1, len(lines)):
133  line = lines[line_index]
134 
135  if test_condition(line):
136  if return_index:
137  return line_index
138  return line
139 
140  def firstTimeStampBefore(self, line_index, lines):
141  """ returns the first timestamp BEFORE the line with given index """
142 
143  return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
144 
145  def firstTimeStampAfter(self, line_index, lines):
146  """ returns the first timestamp AFTER the line with given index """
147 
148  return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
149 
150  def handleParsingError(self, message):
151  if self._DEBUG:
152  raise ValueError(message)
153  print " ======== AND ERROR WHILE PARSING METADATA ===="
154  print message
155  print " =============== end ========================= "
156 
157  #IgProf_Perf, IgProf_Mem, Memcheck, Callgrind
158  #TODO: divide the input using separators
159 
160  """ reads the input cmsPerfsuite.log file """
161  def readInput(self, path, fileName = "cmsPerfSuite.log"):
162  try:
163  f = open(os.path.join(path, fileName), "r")
164  lines = [s.strip() for s in f.readlines()]
165  f.close()
166  except IOError:
167  lines = []
168 
169  #print self._lines
170  return lines
171 
172 
173 
174 
175  def getMachineInfo(self):
176  """ Returns the cpu and memory info """
177 
178  """ cpu info """
179 
180  """
181  we assume that:
182  * num_cores = max(core id+1) [it's counted from 0]
183  * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
184  * cpu MHz - is the speed of CPU
185  """
186  #TODO: BUT cpu MHz show not the maximum speed but current,
187  """
188  for
189  model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
190  cpu MHz : 800.000
191  cache size : 6144 KB
192  """
193  cpu_result = {}
194  try:
195  f= open(os.path.join(self._path, "cpuinfo"), "r")
196 
197  #we split data into a list of tuples = [(attr_name, attr_value), ...]
198  cpu_attributes = [l.strip().split(":") for l in f.readlines()]
199  #print cpu_attributes
200  f.close()
201  cpu_result = {
202  "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]), #Bug... Vidmantas used "core id"
203  "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
204  "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
205  "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
206  }
207  except IOError as e:
208  print e
209 
210 
211 
212 
213 
214  """ memory info """
215  mem_result = {}
216 
217  try:
218  f= open(os.path.join(self._path, "meminfo"), "r")
219 
220  #we split data into a list of tuples = [(attr_name, attr_value), ...]
221  mem_attributes = [l.strip().split(":") for l in f.readlines()]
222 
223  mem_result = {
224  "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
225  }
226 
227  except IOError as e:
228  print e
229 
230  cpu_result.update(mem_result)
231  return cpu_result
232 
233 
234 
235  def _applyParsingRules(self, parsing_rules, lines):
236  """
237  Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
238  to each line and if it matches the line,
239  puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
240  Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
241  rules = [
242  ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
243  ]
244  """
245  """ we call a shared parsing helper """
246  #parsing_rules = map(parsingRulesHelper.rulesRegexpCompileFunction, parsing_rules)
247  #print parsing_rules
248  (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
249 
250  self.missing_fields.extend(missing_fields)
251 
252  return info
253 
254 
255  def parseGeneralInfo(self):
256  lines = self.lines_general
257  """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
258  regexp while the second one is the regexp itself """
259  #TIP: don't forget that tuple of one ends with ,
260  parsing_rules = (
261  (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
262  (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
263  (("architecture",) ,r"""^Current Architecture is (.+)$"""),
264  (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
265  (("base_release_path",) , r"""^Base Release in: (.+)$"""),
266  (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
267 
268  (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
269 
270  (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
271  (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
272  (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
273  (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
274 
275  (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
276  (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
277 
278  (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
279  (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
280 
281  (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
282  (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
283 
284  (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
285  (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
286 
287 
288  (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
289  (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
290  (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
291 
292  (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
293 
294 
295  )
296  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
297  info = self._applyParsingRules(parsing_rules, lines)
298 
299 
300  """ postprocess the candles list """
301  candles = {}
302  for field, value in info.items():
303  if field.startswith("candles_"):
304  test = field.replace("candles_", "")
305  value = [v.strip(" '") for v in value.split(",")]
306  #if value:
307  candles[test]=value
308  del info[field]
309  #print candles
310  info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
311 
312 
313  """ TAGS """
314  """
315  --- Tag --- --- RelTag --- -------- Package --------
316  HEAD V05-03-06 IgTools/IgProf
317  V01-06-05 V01-06-04 Validation/Performance
318  ---------------------------------------
319  total packages: 2 (2 displayed)
320  """
321  tags_start_index = -1 # set some default
322  try:
323  tags_start_index = [i for i in xrange(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
324  except:
325  pass
326  if tags_start_index > -1:
327  tags_end_index = [i for i in xrange(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
328  # print "tags start index: %s, end index: %s" % (tags_start_index, tags_end_index)
329  tags = lines[tags_start_index:tags_end_index+2]
330  # print [tag.split(" ") for tag in tags]
331  # print "\n".join(tags)
332  else: # no tags found, make an empty list ...
333  tags = []
334  """ we join the tags with separator to store as simple string """
335  info["tags"] = self._LINE_SEPARATOR.join(tags)
336  #FILES/PATHS
337 
338 
339  """ get the command line """
340  try:
341  cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1 #that's the next line
342  info["command_line"] = lines[cmd_index]
343  except IndexError as e:
344  if self._DEBUG:
345  print e
346  info["command_line"] = ""
347 
348  try:
349  cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
350  cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
351  info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
352  except IndexError as e:
353  if self._DEBUG:
354  print e
355  info["command_line"] = ""
356 
357  return info
358 
359 
361  #make it general, for whatever test comes...
362  test = {}
363 
364  parsing_rules = (
365  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
366  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
367  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
368  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
369  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
370  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
371  #not shure if event content is required
372  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
373  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
374  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
375  )
376 
377 
378  lines = self.lines_other
379  """
380 
381  for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
382  * beginning ->> and start timestamp- the firstone:
383  Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each
384  Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads
385  Mon Jun 14 20:06:54 2010
386 
387  <... whatever might be here, might overlap with other test start/end messages ..>
388 
389  Mon Jun 14 21:59:33 2010
390  IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4
391 
392  * ending - the last timestamp "before is done running ...."
393  """
394  # we take the first TimeStamp after the starting message and the first before the finishing message in 2 rounds..
395 
396  #TODO: if threads would be changed it would stop working!!!
397 
398  # i.e. Memcheck, cpu, events
399  reSubmit = re.compile(r"""^Let's submit (.+) test on core (\d+)$""")
400 
401  reStart = re.compile(r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
402 
403  # i.e. Memcheck, thread name,id,core number
404  reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
405 
406  reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
407 
408  reWaiting = re.compile(r"""^Waiting for tests to be done...$""")
409 
410  reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
411  """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
412 
413  jobs = []
414 
415  #can split it into jobs ! just have to reparse it for the exit codes later....
416  for line_index in xrange(0, len(lines)):
417  line = lines[line_index]
418  if reSubmit.match(line):
419  end_index = self.findLineAfter(line_index, lines, test_condition=lambda l: reWaiting.match(l), return_index = True)
420  jobs.append(lines[line_index:end_index])
421 
422  for job_lines in jobs:
423  #print job_lines
424  info = self._applyParsingRules(parsing_rules, job_lines)
425  #Fixing here the compatibility with new cmsdriver.py --conditions option
426  #(for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
427  if 'auto:' in info['conditions']:
428  from Configuration.AlCa.autoCond import autoCond
429  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
430  else:
431  if 'FrontierConditions_GlobalTag' in info['conditions']:
432  info['conditions']=info['conditions'].split(",")[1]
433 
434  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
435  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
436  #probably it includes steps until we found *Candle... ?
437  steps = job_lines[steps_start + 1:steps_end]
438  if not self.validateSteps(steps):
439  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
440 
441  """ quite nasty - just a work around """
442  print "Trying to recover from this error in case of old cmssw"
443 
444  """ we assume that steps are between the following sentance and a TimeStamp """
445  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
446  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
447 
448  steps = job_lines[steps_start + 1:steps_end]
449  if not self.validateSteps(steps):
450  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
451  else:
452  print "RECOVERY SEEMS to be successful: %s" % str(steps)
453 
454  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
455 
456  start_id_index = self.findLineAfter(0, job_lines, test_condition = reStart.match, return_index = True)
457  pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
458  info["testname"] = testName
459 
460  thread_id_index = self.findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index = True)
461  info["start"] = self.firstTimeStampAfter(thread_id_index, job_lines)
462 
463  thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
464  info["thread_id"] = thread_id
465 
466  if testName not in test:
467  test[testName] = []
468  test[testName].append(info)
469 
470  for line_index in xrange(0, len(lines)):
471  line = lines[line_index]
472 
473  if reEnd.match(line):
474  testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
475  time = self.firstTimeStampBefore(line_index, lines)
476  try:
477  exit_code = ""
478  #we search for the exit code
479  line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
480  exit_code, = reExitCode.match(line_exitcode).groups()
481  except Exception as e:
482  print "Error while getting exit code (Other test): %s" + str(e)
483 
484  for key, thread in test.items():
485  for i in range(0, len(thread)):
486  if thread[i]["thread_id"] == thread_id:
487  thread[i].update({"end": time, "exit_code": exit_code})
488  break
489 
490  return test
491 
492 
493  def parseTimeSize(self):
494  """ parses the timeSize """
495  timesize_result = []
496 
497  # TODO: we will use the first timestamp after the "or these tests will use user input file..."
498  #TODO: do we have to save the name of input file somewhere?
499  """
500  the structure of input file:
501  * beginning ->> and start timestamp- the firstone:
502  >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
503  <...>
504  Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
505  Candle MinBias will be PROCESSED
506  You defined your own steps to run:
507  RAW2DIGI-RECO
508  *Candle MinBias
509  Written out cmsRelvalreport.py input file at:
510  /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
511  Thu Aug 13 14:53:37 2009 [start]
512  <....>
513  Thu Aug 13 16:04:48 2009 [end]
514  Individual cmsRelvalreport.py ExitCode 0
515  * ending - the last timestamp "... ExitCode ...."
516  """
517  #TODO: do we need the cmsDriver --conditions? I suppose it would the global per work directory = 1 perfsuite run (so samefor all candles in one work dir)
518  # TODO: which candle definition to use?
519  """ divide into separate jobs """
520  lines = self.lines_timesize
521  jobs = []
522  start = False
523  timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
524  for line_index in xrange(0, len(lines)):
525  line = lines[line_index]
526  # search for start of each TimeSize job (with a certain candle and step)
527  if timesize_start_indicator.match(line):
528  if start:
529  jobs.append(lines[start:line_index])
530  start = line_index
531  #add the last one
532  jobs.append(lines[start:len(lines)])
533  #print "\n".join(str(i) for i in jobs)
534 
535  parsing_rules = (
536  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
537  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
538  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
539  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
540  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
541  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
542  #not shure if event content is required
543  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
544  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
545  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
546  )
547 
548  #parse each of the TimeSize jobs: find candles, etc and start-end times
549 
550  reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
551 
552  if self._DEBUG:
553  print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
554 
555  for job_lines in jobs:
556  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
557  info = self._applyParsingRules(parsing_rules, job_lines)
558  #Fixing here the compatibility with new cmsdriver.py --conditions option (for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
559  if 'auto:' in info['conditions']:
560  from Configuration.AlCa.autoCond import autoCond
561  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
562  else:
563  if 'FrontierConditions_GlobalTag' in info['conditions']:
564  info['conditions']=info['conditions'].split(",")[1]
565 
566  #DEBUG:
567  #print "CONDITIONS are: %s"%info['conditions']
568  #start time - the index after which comes the time stamp
569  """ the following is not available on one of the releases, instead
570  use the first timestamp available on our job - that's the starting time :) """
571 
572  #start_time_after = self.findFirstIndex_ofStartsWith(job_lines, "Written out cmsRelvalreport.py input file at:")
573  #print start_time_after
574  info["start"] = self.firstTimeStampAfter(0, job_lines)
575 
576  #TODO: improve in future (in case of some changes) we could use findBefore instead which uses the regexp as parameter for searching
577  #end time - the index before which comes the time stamp
578 
579  # On older files we have - "Individual Relvalreport.py ExitCode 0" instead of "Individual cmsRelvalreport.py ExitCode"
580  end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
581 
582  # on the same line we have the exit Code - so let's get it
583  nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
584 
585  info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
586  info["exit_code"] = exit_code
587 
588  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
589  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
590  #probably it includes steps until we found *Candle... ?
591  steps = job_lines[steps_start + 1:steps_end]
592  if not self.validateSteps(steps):
593  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
594 
595  """ quite nasty - just a work around """
596  print "Trying to recover from this error in case of old cmssw"
597 
598  """ we assume that steps are between the following sentance and a TimeStamp """
599  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
600  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
601 
602  steps = job_lines[steps_start + 1:steps_end]
603  if not self.validateSteps(steps):
604  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
605  else:
606  print "RECOVERY SEEMS to be successful: %s" % str(steps)
607 
608  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
609 
610 
611  timesize_result.append(info)
612  return {"TimeSize": timesize_result}
613  #TODO:
614 
615 
616 
617  def readCmsScimarkTest(self, testName, testType, core):
618  lines = self.readInput(self._path, fileName = testName + ".log")
619  scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
620  for line in lines
621  if self.reCmsScimarkTest.match(line)]
622  #add the number of messurment
623  i = 0
624  for score in scores:
625  i += 1
626  score.update({"messurement_number": i})
627  return scores
628 
629  def readCmsScimark(self, main_cores = [1]):
630  main_core = main_cores[0]
631  #TODO: WE DO NOT ALWAYS REALLY KNOW THE MAIN CORE NUMBER! but we don't care too much
632  #we parse each of the SciMark files and the Composite scores
633  csimark = []
634  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
635  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
636 
637 
638  #we not always know the number of cores available so we will just search the directory to find out core numbers
639  reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
640  scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
641  for f in os.listdir(self._path)
642  if reIsCsiMark_notusedcore.match(f)
643  and os.path.isfile(os.path.join(self._path, f)) ]
644 
645  for core_number in scimark_files:
646  try:
647  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
648  except IOError as e:
649  if self._DEBUG:
650  print e
651  return csimark
652  #print csimark
653 
655  """
656  checks if the suite has successfully finished
657  and if the tarball was successfully archived and uploaded to the castor """
658 
659  parsing_rules = (
660  (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
661  (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
662  (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
663  #TODO: WE MUST HAVE THE CASTOR URL, but for some of files it's not included [probably crashed]
664  (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
665  (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
666  )
667 
668 
669  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
670  info = self._applyParsingRules(parsing_rules, self.lines_other)
671 
672  """ did we detect any errors in log files ? """
673  info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
674  if not info["successfully_archived_tarball"]:
675  info["castor_file_url"] = ""
676 
677  if not info["castor_file_url"]:
678  #TODO: get the castor file url or abort
679  self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
680  lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
681 
682  url = ""
683  try:
684  #print "HERE!"
685  url=self.get_tarball_fromlog()
686  print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
687 
688  except:
689  if "PERFDB_CASTOR_FILE_URL" in os.environ:
690  url = os.environ["PERFDB_CASTOR_FILE_URL"]
691 
692  else: #FIXME: add the possibility to get it directly from the cmsPerfSuite.log file (make sure it is dumped there before doing the tarball itself...)
693  print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
694  self.handleParsingError( "Castor tarball URL not found. Provide interactively")
695 
696  while True:
697 
698  if lmdb_castor_url_is_valid(url):
699  info["castor_file_url"] = url
700  break
701  print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
702  if os.isatty(0): url = sys.stdin.readline()
703  else: raise IOError("stdin is closed.")
704 
705 
706  return info
708  '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
709  print "Getting the url from the cmsPerfSuite.log"
710  log=open("cmsPerfSuite.log","r")
711  castor_dir="UNKNOWN_CASTOR_DIR"
712  tarball="UNKNOWN_TARBALL"
713  for line in log.readlines():
714  if 'castordir' in line:
715  castor_dir=line.split()[1]
716  if 'tgz' in line and tarball=="UNKNOWN_TARBALL": #Pick the first line that contains the tar command...
717  if 'tar' in line:
718  tarball=os.path.basename(line.split()[2])
719  castor_tarball=os.path.join(castor_dir,tarball)
720  return castor_tarball
721 
722  def parseAll(self):
723  result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, 'unrecognized_jobs': []}
724 
725  """ all the general info - start, arguments, host etc """
726  result["General"].update(self.parseGeneralInfo())
727 
728  """ machine info - cpu, memmory """
729  result["General"].update(self.getMachineInfo())
730 
731  """ we add info about how successfull was the run, when it finished and final castor url to the file! """
732  result["General"].update(self.parseTheCompletion())
733 
734  print "Parsing TimeSize runs..."
735  if len(self.lines_timesize) > 0:
736  try:
737  result["TestResults"].update(self.parseTimeSize())
738  except Exception as e:
739  print "BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " + str(e)
740 
741  print "Parsing Other(IgProf, Memcheck, ...) runs..."
742  try:
743  result["TestResults"].update(self.parseAllOtherTests())
744  except Exception as e:
745  print "BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " + str(e)
746 
747  #print result["TestResults"]
748 
749 
750  main_cores = [result["General"]["run_on_cpus"]]
751  num_cores = result["General"].get("num_cores", 0)
752  #DEBUG
753  #print "Number of cores was: %s"%num_cores
754  #TODO: temporarly - search for cores, use regexp
755  main_cores = [1]
756 
757  # THE MAHCINE SCIMARKS
758  result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
759 
760  if self.missing_fields:
761  self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
762 
763  return result
764 
765 
766 
767 if __name__ == "__main__":
768  from xml.dom import minidom
769  import cmssw_exportdb_xml
770  #steps do not get parsed corectly
771  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_1_0_pre7_--usersteps=RAW2DIGI-RECO_lxbuild107.cern.ch_relval/relval/CMSSW_3_1_0_pre7/work2"
772  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI"
773  #includes finishing time, succesfully archived tarball etc
774  #path = "/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PileUp"
775  path = os.path.abspath(".") #Better to point to the local dir than to some old Vidmantas' laptop dirs ;)
776  #p = parserPerfsuiteMetadata("/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PerfsuiteRun")
778  run_info = p.parseAll()
779 
780  #print "======= GENERAL ========= "
781  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["General"].items())
782  #print "======= Test results ========= "
783  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["TestResults"].items())
784 
785  xml_doc = minidom.Document()
786  cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
787  #print "General info:" + str(p.parseGeneralInfo())
788  import doctest
789  doctest.testmod()
790 
791  #print p.readCmsScimark()
792 
793 
def findLineAfter(self, line_index, lines, test_condition, return_index=False)
def readInput(self, path, fileName="cmsPerfSuite.log")
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def exportRunInfo(xml_doc, run_info, release=None, print_out=False)
def readCmsScimarkTest(self, testName, testType, core)
def findLineBefore(self, line_index, lines, test_condition)
#define update(a, b)
def rulesParser(parsing_rules, lines, compileRules=True)
#define str(s)
double split
Definition: MVATrainer.cc:139
T get(const Candidate &c)
Definition: component.h:55