CMS 3D CMS Logo

parserPerfsuiteMetadata.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import re
3 import os, sys
4 import time
5 import parsingRulesHelper
6 import glob
7 from commands import getstatusoutput
8 
10  """
11  The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
12 
13  * General info
14  As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
15  Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
16 
17  * TimeSize test
18  We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
19 
20  * All other tests
21  We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
22  The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
23  """
24  _LINE_SEPARATOR = "|"
25  def validateSteps(self, steps):
26  """ Simple function for error detection. TODO: we could use a list of possible steps also """
27  return not (not steps or len(steps) > self._MAX_STEPS)
28 
29  def __init__(self, path):
30 
31  self._MAX_STEPS = 5 # MAXIMUM NUMBER OF STEPS PER RUN (taskset relvalreport.py...)
32  self._DEBUG = False
33 
34 
35  self._path = path
36 
37  """ some initialisation to speedup the other functions """
38  #for cmsscimark
39  self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
40 
41  #TimeSize
42  """ the separator for beginning of timeSize / end of general statistics """
43  self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
44  """ (the first timestamp is the start of TimeSize) """
45 
46 
47  """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
48  self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
49 
50  #Other tests:
51  self._otherStart = re.compile(r"^Preparing")
52 
53  """
54  ----- READ THE DATA -----
55  """
56  lines = self.readInput(path)
57  """ split the whole file into parts """
58  #Let's not assume there are ALWAYS TimeSize tests in the runs of the Performance Suite!:
59  #Check first:
60  #FIXME: Vidmantas did not think to this case... will need to implement protectionb against it for all the IB tests...
61  #To do as soon as possible...
62  #Maybe revisit the strategy if it can be done quickly.
63  timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
64  if timesize_end:
65  timesize_end_index = timesize_end[0]
66  else:
67  timesize_end_index=0
68  timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
69  general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
70  if timesize_start:
71  timesize_start_index = timesize_start[0]
72  general_stop_index = timesize_start_index
73  elif general_stop:
74  timesize_start_index=timesize_end_index+1
75  general_stop_index=general_stop[0]
76  else:
77  timesize_start_index=0
78  general_stop_index=-1
79 
80  """ we split the structure:
81  * general
82  * timesize
83  * all others [igprof etc]
84  """
85 
86  """ we get the indexes of spliting """
87  #Not OK to use timsize_start_index for the general lines... want to be general, also to cases of no TimeSize tests...
88  #self.lines_general = lines[:timesize_start_index]
89  self.lines_general = lines[:general_stop_index]
90  self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
91  self.lines_other = lines[timesize_end_index:]
92 
93  """ a list of missing fields """
94  self.missing_fields = []
95 
96  @staticmethod
97  def isTimeStamp(line):
98  """
99  Returns whether the string is a timestamp (if not returns None)
100 
101  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
102  True
103  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
104 
105  """
106  datetime_format = "%a %b %d %H:%M:%S %Y" # we use default date format
107  try:
108  time.strptime(line, datetime_format)
109  return True
110  except ValueError:
111  return None
112 
113  @staticmethod
114  def findFirstIndex_ofStartsWith(job_lines, start_of_line):
115  return [job_lines.index(line)
116  for line in job_lines
117  if line.startswith(start_of_line)][0]
118 
119  def findLineBefore(self, line_index, lines, test_condition):
120  """ finds a line satisfying the `test_condition` comming before the `line_index` """
121  # we're going backwards the lines list
122  for line_index in xrange(line_index -1, -1, -1):
123  line = lines[line_index]
124 
125  if test_condition(line):
126  return line
127  raise ValueError
128 
129 
130  def findLineAfter(self, line_index, lines, test_condition, return_index = False):
131  """ finds a line satisfying the `test_condition` comming after the `line_index` """
132  # we're going forward the lines list
133  for line_index in xrange(line_index + 1, len(lines)):
134  line = lines[line_index]
135 
136  if test_condition(line):
137  if return_index:
138  return line_index
139  return line
140 
141  def firstTimeStampBefore(self, line_index, lines):
142  """ returns the first timestamp BEFORE the line with given index """
143 
144  return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
145 
146  def firstTimeStampAfter(self, line_index, lines):
147  """ returns the first timestamp AFTER the line with given index """
148 
149  return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
150 
151  def handleParsingError(self, message):
152  if self._DEBUG:
153  raise ValueError(message)
154  print(" ======== AND ERROR WHILE PARSING METADATA ====")
155  print(message)
156  print(" =============== end ========================= ")
157 
158  #IgProf_Perf, IgProf_Mem, Memcheck, Callgrind
159  #TODO: divide the input using separators
160 
161  """ reads the input cmsPerfsuite.log file """
162  def readInput(self, path, fileName = "cmsPerfSuite.log"):
163  try:
164  f = open(os.path.join(path, fileName), "r")
165  lines = [s.strip() for s in f.readlines()]
166  f.close()
167  except IOError:
168  lines = []
169 
170  #print self._lines
171  return lines
172 
173 
174 
175 
176  def getMachineInfo(self):
177  """ Returns the cpu and memory info """
178 
179  """ cpu info """
180 
181  """
182  we assume that:
183  * num_cores = max(core id+1) [it's counted from 0]
184  * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
185  * cpu MHz - is the speed of CPU
186  """
187  #TODO: BUT cpu MHz show not the maximum speed but current,
188  """
189  for
190  model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
191  cpu MHz : 800.000
192  cache size : 6144 KB
193  """
194  cpu_result = {}
195  try:
196  f= open(os.path.join(self._path, "cpuinfo"), "r")
197 
198  #we split data into a list of tuples = [(attr_name, attr_value), ...]
199  cpu_attributes = [l.strip().split(":") for l in f.readlines()]
200  #print cpu_attributes
201  f.close()
202  cpu_result = {
203  "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]), #Bug... Vidmantas used "core id"
204  "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
205  "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
206  "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
207  }
208  except IOError as e:
209  print(e)
210 
211 
212 
213 
214 
215  """ memory info """
216  mem_result = {}
217 
218  try:
219  f= open(os.path.join(self._path, "meminfo"), "r")
220 
221  #we split data into a list of tuples = [(attr_name, attr_value), ...]
222  mem_attributes = [l.strip().split(":") for l in f.readlines()]
223 
224  mem_result = {
225  "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
226  }
227 
228  except IOError as e:
229  print(e)
230 
231  cpu_result.update(mem_result)
232  return cpu_result
233 
234 
235 
236  def _applyParsingRules(self, parsing_rules, lines):
237  """
238  Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
239  to each line and if it matches the line,
240  puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
241  Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
242  rules = [
243  ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
244  ]
245  """
246  """ we call a shared parsing helper """
247  #parsing_rules = map(parsingRulesHelper.rulesRegexpCompileFunction, parsing_rules)
248  #print parsing_rules
249  (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
250 
251  self.missing_fields.extend(missing_fields)
252 
253  return info
254 
255 
256  def parseGeneralInfo(self):
257  lines = self.lines_general
258  """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
259  regexp while the second one is the regexp itself """
260  #TIP: don't forget that tuple of one ends with ,
261  parsing_rules = (
262  (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
263  (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
264  (("architecture",) ,r"""^Current Architecture is (.+)$"""),
265  (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
266  (("base_release_path",) , r"""^Base Release in: (.+)$"""),
267  (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
268 
269  (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
270 
271  (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
272  (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
273  (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
274  (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
275 
276  (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
277  (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
278 
279  (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
280  (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
281 
282  (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
283  (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
284 
285  (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
286  (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
287 
288 
289  (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
290  (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
291  (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
292 
293  (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
294 
295 
296  )
297  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
298  info = self._applyParsingRules(parsing_rules, lines)
299 
300 
301  """ postprocess the candles list """
302  candles = {}
303  for field, value in info.items():
304  if field.startswith("candles_"):
305  test = field.replace("candles_", "")
306  value = [v.strip(" '") for v in value.split(",")]
307  #if value:
308  candles[test]=value
309  del info[field]
310  #print candles
311  info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
312 
313 
314  """ TAGS """
315  """
316  --- Tag --- --- RelTag --- -------- Package --------
317  HEAD V05-03-06 IgTools/IgProf
318  V01-06-05 V01-06-04 Validation/Performance
319  ---------------------------------------
320  total packages: 2 (2 displayed)
321  """
322  tags_start_index = -1 # set some default
323  try:
324  tags_start_index = [i for i in xrange(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
325  except:
326  pass
327  if tags_start_index > -1:
328  tags_end_index = [i for i in xrange(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
329  # print "tags start index: %s, end index: %s" % (tags_start_index, tags_end_index)
330  tags = lines[tags_start_index:tags_end_index+2]
331  # print [tag.split(" ") for tag in tags]
332  # print "\n".join(tags)
333  else: # no tags found, make an empty list ...
334  tags = []
335  """ we join the tags with separator to store as simple string """
336  info["tags"] = self._LINE_SEPARATOR.join(tags)
337  #FILES/PATHS
338 
339 
340  """ get the command line """
341  try:
342  cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1 #that's the next line
343  info["command_line"] = lines[cmd_index]
344  except IndexError as e:
345  if self._DEBUG:
346  print(e)
347  info["command_line"] = ""
348 
349  try:
350  cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
351  cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
352  info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
353  except IndexError as e:
354  if self._DEBUG:
355  print(e)
356  info["command_line"] = ""
357 
358  return info
359 
360 
362  #make it general, for whatever test comes...
363  test = {}
364 
365  parsing_rules = (
366  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
367  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
368  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
369  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
370  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
371  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
372  #not shure if event content is required
373  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
374  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
375  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
376  )
377 
378 
379  lines = self.lines_other
380  """
381 
382  for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
383  * beginning ->> and start timestamp- the firstone:
384  Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each
385  Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads
386  Mon Jun 14 20:06:54 2010
387 
388  <... whatever might be here, might overlap with other test start/end messages ..>
389 
390  Mon Jun 14 21:59:33 2010
391  IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4
392 
393  * ending - the last timestamp "before is done running ...."
394  """
395  # we take the first TimeStamp after the starting message and the first before the finishing message in 2 rounds..
396 
397  #TODO: if threads would be changed it would stop working!!!
398 
399  # i.e. Memcheck, cpu, events
400  reSubmit = re.compile(r"""^Let's submit (.+) test on core (\d+)$""")
401 
402  reStart = re.compile(r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
403 
404  # i.e. Memcheck, thread name,id,core number
405  reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
406 
407  reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
408 
409  reWaiting = re.compile(r"""^Waiting for tests to be done...$""")
410 
411  reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
412  """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
413 
414  jobs = []
415 
416  #can split it into jobs ! just have to reparse it for the exit codes later....
417  for line_index in xrange(0, len(lines)):
418  line = lines[line_index]
419  if reSubmit.match(line):
420  end_index = self.findLineAfter(line_index, lines, test_condition=lambda l: reWaiting.match(l), return_index = True)
421  jobs.append(lines[line_index:end_index])
422 
423  for job_lines in jobs:
424  #print job_lines
425  info = self._applyParsingRules(parsing_rules, job_lines)
426  #Fixing here the compatibility with new cmsdriver.py --conditions option
427  #(for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
428  if 'auto:' in info['conditions']:
429  from Configuration.AlCa.autoCond import autoCond
430  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
431  else:
432  if 'FrontierConditions_GlobalTag' in info['conditions']:
433  info['conditions']=info['conditions'].split(",")[1]
434 
435  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
436  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
437  #probably it includes steps until we found *Candle... ?
438  steps = job_lines[steps_start + 1:steps_end]
439  if not self.validateSteps(steps):
440  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
441 
442  """ quite nasty - just a work around """
443  print("Trying to recover from this error in case of old cmssw")
444 
445  """ we assume that steps are between the following sentance and a TimeStamp """
446  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
447  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
448 
449  steps = job_lines[steps_start + 1:steps_end]
450  if not self.validateSteps(steps):
451  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
452  else:
453  print("RECOVERY SEEMS to be successful: %s" % str(steps))
454 
455  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
456 
457  start_id_index = self.findLineAfter(0, job_lines, test_condition = reStart.match, return_index = True)
458  pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
459  info["testname"] = testName
460 
461  thread_id_index = self.findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index = True)
462  info["start"] = self.firstTimeStampAfter(thread_id_index, job_lines)
463 
464  thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
465  info["thread_id"] = thread_id
466 
467  if testName not in test:
468  test[testName] = []
469  test[testName].append(info)
470 
471  for line_index in xrange(0, len(lines)):
472  line = lines[line_index]
473 
474  if reEnd.match(line):
475  testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
476  time = self.firstTimeStampBefore(line_index, lines)
477  try:
478  exit_code = ""
479  #we search for the exit code
480  line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
481  exit_code, = reExitCode.match(line_exitcode).groups()
482  except Exception as e:
483  print("Error while getting exit code (Other test): %s" + str(e))
484 
485  for key, thread in test.items():
486  for i in range(0, len(thread)):
487  if thread[i]["thread_id"] == thread_id:
488  thread[i].update({"end": time, "exit_code": exit_code})
489  break
490 
491  return test
492 
493 
494  def parseTimeSize(self):
495  """ parses the timeSize """
496  timesize_result = []
497 
498  # TODO: we will use the first timestamp after the "or these tests will use user input file..."
499  #TODO: do we have to save the name of input file somewhere?
500  """
501  the structure of input file:
502  * beginning ->> and start timestamp- the firstone:
503  >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
504  <...>
505  Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
506  Candle MinBias will be PROCESSED
507  You defined your own steps to run:
508  RAW2DIGI-RECO
509  *Candle MinBias
510  Written out cmsRelvalreport.py input file at:
511  /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
512  Thu Aug 13 14:53:37 2009 [start]
513  <....>
514  Thu Aug 13 16:04:48 2009 [end]
515  Individual cmsRelvalreport.py ExitCode 0
516  * ending - the last timestamp "... ExitCode ...."
517  """
518  #TODO: do we need the cmsDriver --conditions? I suppose it would the global per work directory = 1 perfsuite run (so samefor all candles in one work dir)
519  # TODO: which candle definition to use?
520  """ divide into separate jobs """
521  lines = self.lines_timesize
522  jobs = []
523  start = False
524  timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
525  for line_index in xrange(0, len(lines)):
526  line = lines[line_index]
527  # search for start of each TimeSize job (with a certain candle and step)
528  if timesize_start_indicator.match(line):
529  if start:
530  jobs.append(lines[start:line_index])
531  start = line_index
532  #add the last one
533  jobs.append(lines[start:len(lines)])
534  #print "\n".join(str(i) for i in jobs)
535 
536  parsing_rules = (
537  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
538  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
539  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
540  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
541  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
542  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
543  #not shure if event content is required
544  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
545  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
546  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
547  )
548 
549  #parse each of the TimeSize jobs: find candles, etc and start-end times
550 
551  reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
552 
553  if self._DEBUG:
554  print("TimeSize (%d) jobs: %s" % (len(jobs), str(jobs)))
555 
556  for job_lines in jobs:
557  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
558  info = self._applyParsingRules(parsing_rules, job_lines)
559  #Fixing here the compatibility with new cmsdriver.py --conditions option (for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
560  if 'auto:' in info['conditions']:
561  from Configuration.AlCa.autoCond import autoCond
562  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
563  else:
564  if 'FrontierConditions_GlobalTag' in info['conditions']:
565  info['conditions']=info['conditions'].split(",")[1]
566 
567  #DEBUG:
568  #print "CONDITIONS are: %s"%info['conditions']
569  #start time - the index after which comes the time stamp
570  """ the following is not available on one of the releases, instead
571  use the first timestamp available on our job - that's the starting time :) """
572 
573  #start_time_after = self.findFirstIndex_ofStartsWith(job_lines, "Written out cmsRelvalreport.py input file at:")
574  #print start_time_after
575  info["start"] = self.firstTimeStampAfter(0, job_lines)
576 
577  #TODO: improve in future (in case of some changes) we could use findBefore instead which uses the regexp as parameter for searching
578  #end time - the index before which comes the time stamp
579 
580  # On older files we have - "Individual Relvalreport.py ExitCode 0" instead of "Individual cmsRelvalreport.py ExitCode"
581  end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
582 
583  # on the same line we have the exit Code - so let's get it
584  nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
585 
586  info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
587  info["exit_code"] = exit_code
588 
589  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
590  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
591  #probably it includes steps until we found *Candle... ?
592  steps = job_lines[steps_start + 1:steps_end]
593  if not self.validateSteps(steps):
594  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
595 
596  """ quite nasty - just a work around """
597  print("Trying to recover from this error in case of old cmssw")
598 
599  """ we assume that steps are between the following sentance and a TimeStamp """
600  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
601  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
602 
603  steps = job_lines[steps_start + 1:steps_end]
604  if not self.validateSteps(steps):
605  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
606  else:
607  print("RECOVERY SEEMS to be successful: %s" % str(steps))
608 
609  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
610 
611 
612  timesize_result.append(info)
613  return {"TimeSize": timesize_result}
614  #TODO:
615 
616 
617 
618  def readCmsScimarkTest(self, testName, testType, core):
619  lines = self.readInput(self._path, fileName = testName + ".log")
620  scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
621  for line in lines
622  if self.reCmsScimarkTest.match(line)]
623  #add the number of messurment
624  i = 0
625  for score in scores:
626  i += 1
627  score.update({"messurement_number": i})
628  return scores
629 
630  def readCmsScimark(self, main_cores = [1]):
631  main_core = main_cores[0]
632  #TODO: WE DO NOT ALWAYS REALLY KNOW THE MAIN CORE NUMBER! but we don't care too much
633  #we parse each of the SciMark files and the Composite scores
634  csimark = []
635  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
636  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
637 
638 
639  #we not always know the number of cores available so we will just search the directory to find out core numbers
640  reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
641  scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
642  for f in os.listdir(self._path)
643  if reIsCsiMark_notusedcore.match(f)
644  and os.path.isfile(os.path.join(self._path, f)) ]
645 
646  for core_number in scimark_files:
647  try:
648  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
649  except IOError as e:
650  if self._DEBUG:
651  print(e)
652  return csimark
653  #print csimark
654 
656  """
657  checks if the suite has successfully finished
658  and if the tarball was successfully archived and uploaded to the castor """
659 
660  parsing_rules = (
661  (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
662  (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
663  (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
664  #TODO: WE MUST HAVE THE CASTOR URL, but for some of files it's not included [probably crashed]
665  (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
666  (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
667  )
668 
669 
670  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
671  info = self._applyParsingRules(parsing_rules, self.lines_other)
672 
673  """ did we detect any errors in log files ? """
674  info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
675  if not info["successfully_archived_tarball"]:
676  info["castor_file_url"] = ""
677 
678  if not info["castor_file_url"]:
679  #TODO: get the castor file url or abort
680  self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
681  lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
682 
683  url = ""
684  try:
685  #print "HERE!"
686  url=self.get_tarball_fromlog()
687  print("Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url)
688 
689  except:
690  if "PERFDB_CASTOR_FILE_URL" in os.environ:
691  url = os.environ["PERFDB_CASTOR_FILE_URL"]
692 
693  else: #FIXME: add the possibility to get it directly from the cmsPerfSuite.log file (make sure it is dumped there before doing the tarball itself...)
694  print("Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL")
695  self.handleParsingError( "Castor tarball URL not found. Provide interactively")
696 
697  while True:
698 
699  if lmdb_castor_url_is_valid(url):
700  info["castor_file_url"] = url
701  break
702  print("Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball")
703  if os.isatty(0): url = sys.stdin.readline()
704  else: raise IOError("stdin is closed.")
705 
706 
707  return info
709  '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
710  print("Getting the url from the cmsPerfSuite.log")
711  log=open("cmsPerfSuite.log","r")
712  castor_dir="UNKNOWN_CASTOR_DIR"
713  tarball="UNKNOWN_TARBALL"
714  for line in log.readlines():
715  if 'castordir' in line:
716  castor_dir=line.split()[1]
717  if 'tgz' in line and tarball=="UNKNOWN_TARBALL": #Pick the first line that contains the tar command...
718  if 'tar' in line:
719  tarball=os.path.basename(line.split()[2])
720  castor_tarball=os.path.join(castor_dir,tarball)
721  return castor_tarball
722 
723  def parseAll(self):
724  result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, 'unrecognized_jobs': []}
725 
726  """ all the general info - start, arguments, host etc """
727  result["General"].update(self.parseGeneralInfo())
728 
729  """ machine info - cpu, memmory """
730  result["General"].update(self.getMachineInfo())
731 
732  """ we add info about how successfull was the run, when it finished and final castor url to the file! """
733  result["General"].update(self.parseTheCompletion())
734 
735  print("Parsing TimeSize runs...")
736  if len(self.lines_timesize) > 0:
737  try:
738  result["TestResults"].update(self.parseTimeSize())
739  except Exception as e:
740  print("BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " + str(e))
741 
742  print("Parsing Other(IgProf, Memcheck, ...) runs...")
743  try:
744  result["TestResults"].update(self.parseAllOtherTests())
745  except Exception as e:
746  print("BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " + str(e))
747 
748  #print result["TestResults"]
749 
750 
751  main_cores = [result["General"]["run_on_cpus"]]
752  num_cores = result["General"].get("num_cores", 0)
753  #DEBUG
754  #print "Number of cores was: %s"%num_cores
755  #TODO: temporarly - search for cores, use regexp
756  main_cores = [1]
757 
758  # THE MAHCINE SCIMARKS
759  result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
760 
761  if self.missing_fields:
762  self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
763 
764  return result
765 
766 
767 
768 if __name__ == "__main__":
769  from xml.dom import minidom
770  import cmssw_exportdb_xml
771  #steps do not get parsed corectly
772  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_1_0_pre7_--usersteps=RAW2DIGI-RECO_lxbuild107.cern.ch_relval/relval/CMSSW_3_1_0_pre7/work2"
773  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI"
774  #includes finishing time, succesfully archived tarball etc
775  #path = "/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PileUp"
776  path = os.path.abspath(".") #Better to point to the local dir than to some old Vidmantas' laptop dirs ;)
777  #p = parserPerfsuiteMetadata("/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PerfsuiteRun")
779  run_info = p.parseAll()
780 
781  #print "======= GENERAL ========= "
782  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["General"].items())
783  #print "======= Test results ========= "
784  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["TestResults"].items())
785 
786  xml_doc = minidom.Document()
787  cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
788  #print "General info:" + str(p.parseGeneralInfo())
789  import doctest
790  doctest.testmod()
791 
792  #print p.readCmsScimark()
793 
794 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def findLineAfter(self, line_index, lines, test_condition, return_index=False)
def readInput(self, path, fileName="cmsPerfSuite.log")
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def exportRunInfo(xml_doc, run_info, release=None, print_out=False)
def readCmsScimarkTest(self, testName, testType, core)
def findLineBefore(self, line_index, lines, test_condition)
#define update(a, b)
def rulesParser(parsing_rules, lines, compileRules=True)
#define str(s)
double split
Definition: MVATrainer.cc:139
T get(const Candidate &c)
Definition: component.h:55