CMS 3D CMS Logo

parserPerfsuiteMetadata.py
Go to the documentation of this file.
1 from __future__ import print_function
2 from __future__ import absolute_import
3 from builtins import range
4 import re
5 import os, sys
6 import time
7 from . import parsingRulesHelper
8 import glob
9 from commands import getstatusoutput
10 
12  """
13  The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
14 
15  * General info
16  As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
17  Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
18 
19  * TimeSize test
20  We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
21 
22  * All other tests
23  We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
24  The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
25  """
26  _LINE_SEPARATOR = "|"
27  def validateSteps(self, steps):
28  """ Simple function for error detection. TODO: we could use a list of possible steps also """
29  return not (not steps or len(steps) > self._MAX_STEPS)
30 
31  def __init__(self, path):
32 
33  self._MAX_STEPS = 5 # MAXIMUM NUMBER OF STEPS PER RUN (taskset relvalreport.py...)
34  self._DEBUG = False
35 
36 
37  self._path = path
38 
39  """ some initialisation to speedup the other functions """
40  #for cmsscimark
41  self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
42 
43  #TimeSize
44  """ the separator for beginning of timeSize / end of general statistics """
45  self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
46  """ (the first timestamp is the start of TimeSize) """
47 
48 
49  """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
50  self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
51 
52  #Other tests:
53  self._otherStart = re.compile(r"^Preparing")
54 
55  """
56  ----- READ THE DATA -----
57  """
58  lines = self.readInput(path)
59  """ split the whole file into parts """
60  #Let's not assume there are ALWAYS TimeSize tests in the runs of the Performance Suite!:
61  #Check first:
62  #FIXME: Vidmantas did not think to this case... will need to implement protectionb against it for all the IB tests...
63  #To do as soon as possible...
64  #Maybe revisit the strategy if it can be done quickly.
65  timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
66  if timesize_end:
67  timesize_end_index = timesize_end[0]
68  else:
69  timesize_end_index=0
70  timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
71  general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
72  if timesize_start:
73  timesize_start_index = timesize_start[0]
74  general_stop_index = timesize_start_index
75  elif general_stop:
76  timesize_start_index=timesize_end_index+1
77  general_stop_index=general_stop[0]
78  else:
79  timesize_start_index=0
80  general_stop_index=-1
81 
82  """ we split the structure:
83  * general
84  * timesize
85  * all others [igprof etc]
86  """
87 
88  """ we get the indexes of spliting """
89  #Not OK to use timsize_start_index for the general lines... want to be general, also to cases of no TimeSize tests...
90  #self.lines_general = lines[:timesize_start_index]
91  self.lines_general = lines[:general_stop_index]
92  self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
93  self.lines_other = lines[timesize_end_index:]
94 
95  """ a list of missing fields """
96  self.missing_fields = []
97 
98  @staticmethod
99  def isTimeStamp(line):
100  """
101  Returns whether the string is a timestamp (if not returns None)
102 
103  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
104  True
105  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
106 
107  """
108  datetime_format = "%a %b %d %H:%M:%S %Y" # we use default date format
109  try:
110  time.strptime(line, datetime_format)
111  return True
112  except ValueError:
113  return None
114 
115  @staticmethod
116  def findFirstIndex_ofStartsWith(job_lines, start_of_line):
117  return [job_lines.index(line)
118  for line in job_lines
119  if line.startswith(start_of_line)][0]
120 
121  def findLineBefore(self, line_index, lines, test_condition):
122  """ finds a line satisfying the `test_condition` comming before the `line_index` """
123  # we're going backwards the lines list
124  for line_index in range(line_index -1, -1, -1):
125  line = lines[line_index]
126 
127  if test_condition(line):
128  return line
129  raise ValueError
130 
131 
132  def findLineAfter(self, line_index, lines, test_condition, return_index = False):
133  """ finds a line satisfying the `test_condition` comming after the `line_index` """
134  # we're going forward the lines list
135  for line_index in range(line_index + 1, len(lines)):
136  line = lines[line_index]
137 
138  if test_condition(line):
139  if return_index:
140  return line_index
141  return line
142 
143  def firstTimeStampBefore(self, line_index, lines):
144  """ returns the first timestamp BEFORE the line with given index """
145 
146  return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
147 
148  def firstTimeStampAfter(self, line_index, lines):
149  """ returns the first timestamp AFTER the line with given index """
150 
151  return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
152 
153  def handleParsingError(self, message):
154  if self._DEBUG:
155  raise ValueError(message)
156  print(" ======== AND ERROR WHILE PARSING METADATA ====")
157  print(message)
158  print(" =============== end ========================= ")
159 
160  #IgProf_Perf, IgProf_Mem, Memcheck, Callgrind
161  #TODO: divide the input using separators
162 
163  """ reads the input cmsPerfsuite.log file """
164  def readInput(self, path, fileName = "cmsPerfSuite.log"):
165  try:
166  f = open(os.path.join(path, fileName), "r")
167  lines = [s.strip() for s in f.readlines()]
168  f.close()
169  except IOError:
170  lines = []
171 
172  #print self._lines
173  return lines
174 
175 
176 
177 
178  def getMachineInfo(self):
179  """ Returns the cpu and memory info """
180 
181  """ cpu info """
182 
183  """
184  we assume that:
185  * num_cores = max(core id+1) [it's counted from 0]
186  * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
187  * cpu MHz - is the speed of CPU
188  """
189  #TODO: BUT cpu MHz show not the maximum speed but current,
190  """
191  for
192  model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
193  cpu MHz : 800.000
194  cache size : 6144 KB
195  """
196  cpu_result = {}
197  try:
198  f= open(os.path.join(self._path, "cpuinfo"), "r")
199 
200  #we split data into a list of tuples = [(attr_name, attr_value), ...]
201  cpu_attributes = [l.strip().split(":") for l in f.readlines()]
202  #print cpu_attributes
203  f.close()
204  cpu_result = {
205  "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]), #Bug... Vidmantas used "core id"
206  "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
207  "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
208  "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
209  }
210  except IOError as e:
211  print(e)
212 
213 
214 
215 
216 
217  """ memory info """
218  mem_result = {}
219 
220  try:
221  f= open(os.path.join(self._path, "meminfo"), "r")
222 
223  #we split data into a list of tuples = [(attr_name, attr_value), ...]
224  mem_attributes = [l.strip().split(":") for l in f.readlines()]
225 
226  mem_result = {
227  "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
228  }
229 
230  except IOError as e:
231  print(e)
232 
233  cpu_result.update(mem_result)
234  return cpu_result
235 
236 
237 
238  def _applyParsingRules(self, parsing_rules, lines):
239  """
240  Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
241  to each line and if it matches the line,
242  puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
243  Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
244  rules = [
245  ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
246  ]
247  """
248  """ we call a shared parsing helper """
249  #parsing_rules = map(parsingRulesHelper.rulesRegexpCompileFunction, parsing_rules)
250  #print parsing_rules
251  (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
252 
253  self.missing_fields.extend(missing_fields)
254 
255  return info
256 
257 
258  def parseGeneralInfo(self):
259  lines = self.lines_general
260  """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
261  regexp while the second one is the regexp itself """
262  #TIP: don't forget that tuple of one ends with ,
263  parsing_rules = (
264  (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
265  (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
266  (("architecture",) ,r"""^Current Architecture is (.+)$"""),
267  (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
268  (("base_release_path",) , r"""^Base Release in: (.+)$"""),
269  (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
270 
271  (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
272 
273  (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
274  (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
275  (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
276  (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
277 
278  (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
279  (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
280 
281  (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
282  (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
283 
284  (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
285  (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
286 
287  (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
288  (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
289 
290 
291  (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
292  (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
293  (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
294 
295  (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
296 
297 
298  )
299  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
300  info = self._applyParsingRules(parsing_rules, lines)
301 
302 
303  """ postprocess the candles list """
304  candles = {}
305  for field, value in info.items():
306  if field.startswith("candles_"):
307  test = field.replace("candles_", "")
308  value = [v.strip(" '") for v in value.split(",")]
309  #if value:
310  candles[test]=value
311  del info[field]
312  #print candles
313  info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
314 
315 
316  """ TAGS """
317  """
318  --- Tag --- --- RelTag --- -------- Package --------
319  HEAD V05-03-06 IgTools/IgProf
320  V01-06-05 V01-06-04 Validation/Performance
321  ---------------------------------------
322  total packages: 2 (2 displayed)
323  """
324  tags_start_index = -1 # set some default
325  try:
326  tags_start_index = [i for i in range(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
327  except:
328  pass
329  if tags_start_index > -1:
330  tags_end_index = [i for i in range(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
331  # print "tags start index: %s, end index: %s" % (tags_start_index, tags_end_index)
332  tags = lines[tags_start_index:tags_end_index+2]
333  # print [tag.split(" ") for tag in tags]
334  # print "\n".join(tags)
335  else: # no tags found, make an empty list ...
336  tags = []
337  """ we join the tags with separator to store as simple string """
338  info["tags"] = self._LINE_SEPARATOR.join(tags)
339  #FILES/PATHS
340 
341 
342  """ get the command line """
343  try:
344  cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1 #that's the next line
345  info["command_line"] = lines[cmd_index]
346  except IndexError as e:
347  if self._DEBUG:
348  print(e)
349  info["command_line"] = ""
350 
351  try:
352  cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
353  cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
354  info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
355  except IndexError as e:
356  if self._DEBUG:
357  print(e)
358  info["command_line"] = ""
359 
360  return info
361 
362 
364  #make it general, for whatever test comes...
365  test = {}
366 
367  parsing_rules = (
368  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
369  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
370  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
371  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
372  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
373  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
374  #not shure if event content is required
375  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
376  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
377  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
378  )
379 
380 
381  lines = self.lines_other
382  """
383 
384  for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
385  * beginning ->> and start timestamp- the firstone:
386  Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each
387  Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads
388  Mon Jun 14 20:06:54 2010
389 
390  <... whatever might be here, might overlap with other test start/end messages ..>
391 
392  Mon Jun 14 21:59:33 2010
393  IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4
394 
395  * ending - the last timestamp "before is done running ...."
396  """
397  # we take the first TimeStamp after the starting message and the first before the finishing message in 2 rounds..
398 
399  #TODO: if threads would be changed it would stop working!!!
400 
401  # i.e. Memcheck, cpu, events
402  reSubmit = re.compile(r"""^Let's submit (.+) test on core (\d+)$""")
403 
404  reStart = re.compile(r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
405 
406  # i.e. Memcheck, thread name,id,core number
407  reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
408 
409  reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
410 
411  reWaiting = re.compile(r"""^Waiting for tests to be done...$""")
412 
413  reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
414  """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
415 
416  jobs = []
417 
418  #can split it into jobs ! just have to reparse it for the exit codes later....
419  for line_index in range(0, len(lines)):
420  line = lines[line_index]
421  if reSubmit.match(line):
422  end_index = self.findLineAfter(line_index, lines, test_condition=lambda l: reWaiting.match(l), return_index = True)
423  jobs.append(lines[line_index:end_index])
424 
425  for job_lines in jobs:
426  #print job_lines
427  info = self._applyParsingRules(parsing_rules, job_lines)
428  #Fixing here the compatibility with new cmsdriver.py --conditions option
429  #(for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
430  if 'auto:' in info['conditions']:
431  from Configuration.AlCa.autoCond import autoCond
432  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
433  else:
434  if 'FrontierConditions_GlobalTag' in info['conditions']:
435  info['conditions']=info['conditions'].split(",")[1]
436 
437  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
438  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
439  #probably it includes steps until we found *Candle... ?
440  steps = job_lines[steps_start + 1:steps_end]
441  if not self.validateSteps(steps):
442  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
443 
444  """ quite nasty - just a work around """
445  print("Trying to recover from this error in case of old cmssw")
446 
447  """ we assume that steps are between the following sentance and a TimeStamp """
448  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
449  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
450 
451  steps = job_lines[steps_start + 1:steps_end]
452  if not self.validateSteps(steps):
453  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
454  else:
455  print("RECOVERY SEEMS to be successful: %s" % str(steps))
456 
457  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
458 
459  start_id_index = self.findLineAfter(0, job_lines, test_condition = reStart.match, return_index = True)
460  pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
461  info["testname"] = testName
462 
463  thread_id_index = self.findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index = True)
464  info["start"] = self.firstTimeStampAfter(thread_id_index, job_lines)
465 
466  thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
467  info["thread_id"] = thread_id
468 
469  if testName not in test:
470  test[testName] = []
471  test[testName].append(info)
472 
473  for line_index in range(0, len(lines)):
474  line = lines[line_index]
475 
476  if reEnd.match(line):
477  testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
478  time = self.firstTimeStampBefore(line_index, lines)
479  try:
480  exit_code = ""
481  #we search for the exit code
482  line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
483  exit_code, = reExitCode.match(line_exitcode).groups()
484  except Exception as e:
485  print("Error while getting exit code (Other test): %s" + str(e))
486 
487  for key, thread in test.items():
488  for i in range(0, len(thread)):
489  if thread[i]["thread_id"] == thread_id:
490  thread[i].update({"end": time, "exit_code": exit_code})
491  break
492 
493  return test
494 
495 
496  def parseTimeSize(self):
497  """ parses the timeSize """
498  timesize_result = []
499 
500  # TODO: we will use the first timestamp after the "or these tests will use user input file..."
501  #TODO: do we have to save the name of input file somewhere?
502  """
503  the structure of input file:
504  * beginning ->> and start timestamp- the firstone:
505  >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
506  <...>
507  Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
508  Candle MinBias will be PROCESSED
509  You defined your own steps to run:
510  RAW2DIGI-RECO
511  *Candle MinBias
512  Written out cmsRelvalreport.py input file at:
513  /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
514  Thu Aug 13 14:53:37 2009 [start]
515  <....>
516  Thu Aug 13 16:04:48 2009 [end]
517  Individual cmsRelvalreport.py ExitCode 0
518  * ending - the last timestamp "... ExitCode ...."
519  """
520  #TODO: do we need the cmsDriver --conditions? I suppose it would the global per work directory = 1 perfsuite run (so samefor all candles in one work dir)
521  # TODO: which candle definition to use?
522  """ divide into separate jobs """
523  lines = self.lines_timesize
524  jobs = []
525  start = False
526  timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
527  for line_index in range(0, len(lines)):
528  line = lines[line_index]
529  # search for start of each TimeSize job (with a certain candle and step)
530  if timesize_start_indicator.match(line):
531  if start:
532  jobs.append(lines[start:line_index])
533  start = line_index
534  #add the last one
535  jobs.append(lines[start:len(lines)])
536  #print "\n".join(str(i) for i in jobs)
537 
538  parsing_rules = (
539  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
540  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
541  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
542  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
543  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
544  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
545  #not shure if event content is required
546  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
547  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
548  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
549  )
550 
551  #parse each of the TimeSize jobs: find candles, etc and start-end times
552 
553  reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
554 
555  if self._DEBUG:
556  print("TimeSize (%d) jobs: %s" % (len(jobs), str(jobs)))
557 
558  for job_lines in jobs:
559  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
560  info = self._applyParsingRules(parsing_rules, job_lines)
561  #Fixing here the compatibility with new cmsdriver.py --conditions option (for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
562  if 'auto:' in info['conditions']:
563  from Configuration.AlCa.autoCond import autoCond
564  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
565  else:
566  if 'FrontierConditions_GlobalTag' in info['conditions']:
567  info['conditions']=info['conditions'].split(",")[1]
568 
569  #DEBUG:
570  #print "CONDITIONS are: %s"%info['conditions']
571  #start time - the index after which comes the time stamp
572  """ the following is not available on one of the releases, instead
573  use the first timestamp available on our job - that's the starting time :) """
574 
575  #start_time_after = self.findFirstIndex_ofStartsWith(job_lines, "Written out cmsRelvalreport.py input file at:")
576  #print start_time_after
577  info["start"] = self.firstTimeStampAfter(0, job_lines)
578 
579  #TODO: improve in future (in case of some changes) we could use findBefore instead which uses the regexp as parameter for searching
580  #end time - the index before which comes the time stamp
581 
582  # On older files we have - "Individual Relvalreport.py ExitCode 0" instead of "Individual cmsRelvalreport.py ExitCode"
583  end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
584 
585  # on the same line we have the exit Code - so let's get it
586  nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
587 
588  info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
589  info["exit_code"] = exit_code
590 
591  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
592  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
593  #probably it includes steps until we found *Candle... ?
594  steps = job_lines[steps_start + 1:steps_end]
595  if not self.validateSteps(steps):
596  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
597 
598  """ quite nasty - just a work around """
599  print("Trying to recover from this error in case of old cmssw")
600 
601  """ we assume that steps are between the following sentance and a TimeStamp """
602  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
603  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
604 
605  steps = job_lines[steps_start + 1:steps_end]
606  if not self.validateSteps(steps):
607  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
608  else:
609  print("RECOVERY SEEMS to be successful: %s" % str(steps))
610 
611  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
612 
613 
614  timesize_result.append(info)
615  return {"TimeSize": timesize_result}
616  #TODO:
617 
618 
619 
620  def readCmsScimarkTest(self, testName, testType, core):
621  lines = self.readInput(self._path, fileName = testName + ".log")
622  scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
623  for line in lines
624  if self.reCmsScimarkTest.match(line)]
625  #add the number of messurment
626  i = 0
627  for score in scores:
628  i += 1
629  score.update({"messurement_number": i})
630  return scores
631 
632  def readCmsScimark(self, main_cores = [1]):
633  main_core = main_cores[0]
634  #TODO: WE DO NOT ALWAYS REALLY KNOW THE MAIN CORE NUMBER! but we don't care too much
635  #we parse each of the SciMark files and the Composite scores
636  csimark = []
637  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
638  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
639 
640 
641  #we not always know the number of cores available so we will just search the directory to find out core numbers
642  reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
643  scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
644  for f in os.listdir(self._path)
645  if reIsCsiMark_notusedcore.match(f)
646  and os.path.isfile(os.path.join(self._path, f)) ]
647 
648  for core_number in scimark_files:
649  try:
650  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
651  except IOError as e:
652  if self._DEBUG:
653  print(e)
654  return csimark
655  #print csimark
656 
658  """
659  checks if the suite has successfully finished
660  and if the tarball was successfully archived and uploaded to the castor """
661 
662  parsing_rules = (
663  (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
664  (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
665  (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
666  #TODO: WE MUST HAVE THE CASTOR URL, but for some of files it's not included [probably crashed]
667  (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
668  (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
669  )
670 
671 
672  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
673  info = self._applyParsingRules(parsing_rules, self.lines_other)
674 
675  """ did we detect any errors in log files ? """
676  info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
677  if not info["successfully_archived_tarball"]:
678  info["castor_file_url"] = ""
679 
680  if not info["castor_file_url"]:
681  #TODO: get the castor file url or abort
682  self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
683  lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
684 
685  url = ""
686  try:
687  #print "HERE!"
688  url=self.get_tarball_fromlog()
689  print("Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url)
690 
691  except:
692  if "PERFDB_CASTOR_FILE_URL" in os.environ:
693  url = os.environ["PERFDB_CASTOR_FILE_URL"]
694 
695  else: #FIXME: add the possibility to get it directly from the cmsPerfSuite.log file (make sure it is dumped there before doing the tarball itself...)
696  print("Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL")
697  self.handleParsingError( "Castor tarball URL not found. Provide interactively")
698 
699  while True:
700 
701  if lmdb_castor_url_is_valid(url):
702  info["castor_file_url"] = url
703  break
704  print("Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball")
705  if os.isatty(0): url = sys.stdin.readline()
706  else: raise IOError("stdin is closed.")
707 
708 
709  return info
711  '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
712  print("Getting the url from the cmsPerfSuite.log")
713  log=open("cmsPerfSuite.log","r")
714  castor_dir="UNKNOWN_CASTOR_DIR"
715  tarball="UNKNOWN_TARBALL"
716  for line in log.readlines():
717  if 'castordir' in line:
718  castor_dir=line.split()[1]
719  if 'tgz' in line and tarball=="UNKNOWN_TARBALL": #Pick the first line that contains the tar command...
720  if 'tar' in line:
721  tarball=os.path.basename(line.split()[2])
722  castor_tarball=os.path.join(castor_dir,tarball)
723  return castor_tarball
724 
725  def parseAll(self):
726  result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, 'unrecognized_jobs': []}
727 
728  """ all the general info - start, arguments, host etc """
729  result["General"].update(self.parseGeneralInfo())
730 
731  """ machine info - cpu, memmory """
732  result["General"].update(self.getMachineInfo())
733 
734  """ we add info about how successfull was the run, when it finished and final castor url to the file! """
735  result["General"].update(self.parseTheCompletion())
736 
737  print("Parsing TimeSize runs...")
738  if len(self.lines_timesize) > 0:
739  try:
740  result["TestResults"].update(self.parseTimeSize())
741  except Exception as e:
742  print("BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " + str(e))
743 
744  print("Parsing Other(IgProf, Memcheck, ...) runs...")
745  try:
746  result["TestResults"].update(self.parseAllOtherTests())
747  except Exception as e:
748  print("BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " + str(e))
749 
750  #print result["TestResults"]
751 
752 
753  main_cores = [result["General"]["run_on_cpus"]]
754  num_cores = result["General"].get("num_cores", 0)
755  #DEBUG
756  #print "Number of cores was: %s"%num_cores
757  #TODO: temporarly - search for cores, use regexp
758  main_cores = [1]
759 
760  # THE MAHCINE SCIMARKS
761  result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
762 
763  if self.missing_fields:
764  self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
765 
766  return result
767 
768 
769 
770 if __name__ == "__main__":
771  from xml.dom import minidom
772  from . import cmssw_exportdb_xml
773  #steps do not get parsed corectly
774  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_1_0_pre7_--usersteps=RAW2DIGI-RECO_lxbuild107.cern.ch_relval/relval/CMSSW_3_1_0_pre7/work2"
775  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI"
776  #includes finishing time, succesfully archived tarball etc
777  #path = "/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PileUp"
778  path = os.path.abspath(".") #Better to point to the local dir than to some old Vidmantas' laptop dirs ;)
779  #p = parserPerfsuiteMetadata("/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PerfsuiteRun")
781  run_info = p.parseAll()
782 
783  #print "======= GENERAL ========= "
784  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["General"].items())
785  #print "======= Test results ========= "
786  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["TestResults"].items())
787 
788  xml_doc = minidom.Document()
789  cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
790  #print "General info:" + str(p.parseGeneralInfo())
791  import doctest
792  doctest.testmod()
793 
794  #print p.readCmsScimark()
795 
796 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def findLineAfter(self, line_index, lines, test_condition, return_index=False)
def readInput(self, path, fileName="cmsPerfSuite.log")
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def exportRunInfo(xml_doc, run_info, release=None, print_out=False)
def readCmsScimarkTest(self, testName, testType, core)
def findLineBefore(self, line_index, lines, test_condition)
#define update(a, b)
def rulesParser(parsing_rules, lines, compileRules=True)
#define str(s)
double split
Definition: MVATrainer.cc:139
T get(const Candidate &c)
Definition: component.h:55