CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
parserPerfsuiteMetadata.py
Go to the documentation of this file.
1 import re
2 import os, sys
3 import time
4 import parsingRulesHelper
5 import glob
6 from commands import getstatusoutput
7 
9  """
10  The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
11 
12  * General info
13  As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
14  Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
15 
16  * TimeSize test
17  We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
18 
19  * All other tests
20  We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
21  The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
22  """
23  _LINE_SEPARATOR = "|"
24  def validateSteps(self, steps):
25  """ Simple function for error detection. TODO: we could use a list of possible steps also """
26  return not (not steps or len(steps) > self._MAX_STEPS)
27 
28  def __init__(self, path):
29 
30  self._MAX_STEPS = 5 # MAXIMUM NUMBER OF STEPS PER RUN (taskset relvalreport.py...)
31  self._DEBUG = False
32 
33 
34  self._path = path
35 
36  """ some initialisation to speedup the other functions """
37  #for cmsscimark
38  self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
39 
40  #TimeSize
41  """ the separator for beginning of timeSize / end of general statistics """
42  self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
43  """ (the first timestamp is the start of TimeSize) """
44 
45 
46  """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
47  self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
48 
49  #Other tests:
50  self._otherStart = re.compile(r"^Preparing")
51 
52  """
53  ----- READ THE DATA -----
54  """
55  lines = self.readInput(path)
56  """ split the whole file into parts """
57  #Let's not assume there are ALWAYS TimeSize tests in the runs of the Performance Suite!:
58  #Check first:
59  #FIXME: Vidmantas did not think to this case... will need to implement protectionb against it for all the IB tests...
60  #To do as soon as possible...
61  #Maybe revisit the strategy if it can be done quickly.
62  timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
63  if timesize_end:
64  timesize_end_index = timesize_end[0]
65  else:
66  timesize_end_index=0
67  timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
68  general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
69  if timesize_start:
70  timesize_start_index = timesize_start[0]
71  general_stop_index=timesize_start_index
72  elif general_stop:
73  timesize_start_index=0
74  general_stop_index=general_stop[0]
75  else:
76  timesize_start_index=0
77  general_stop_index=-1
78 
79  """ we split the structure:
80  * general
81  * timesize
82  * all others [igprof etc]
83  """
84 
85  """ we get the indexes of spliting """
86  #Not OK to use timsize_start_index for the general lines... want to be general, also to cases of no TimeSize tests...
87  #self.lines_general = lines[:timesize_start_index]
88  self.lines_general = lines[:general_stop_index]
89  self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
90  self.lines_other = lines[timesize_end_index:]
91 
92  """ a list of missing fields """
93  self.missing_fields = []
94 
95  @staticmethod
96  def isTimeStamp(line):
97  """
98  Returns whether the string is a timestamp (if not returns None)
99 
100  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
101  True
102  >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
103 
104  """
105  datetime_format = "%a %b %d %H:%M:%S %Y" # we use default date format
106  try:
107  time.strptime(line, datetime_format)
108  return True
109  except ValueError:
110  return None
111 
112  @staticmethod
113  def findFirstIndex_ofStartsWith(job_lines, start_of_line):
114  return [job_lines.index(line)
115  for line in job_lines
116  if line.startswith(start_of_line)][0]
117 
118  def findLineBefore(self, line_index, lines, test_condition):
119  """ finds a line satisfying the `test_condition` comming before the `line_index` """
120  # we're going backwards the lines list
121  for line_index in xrange(line_index -1, -1, -1):
122  line = lines[line_index]
123 
124  if test_condition(line):
125  return line
126  raise ValueError
127 
128 
129  def findLineAfter(self, line_index, lines, test_condition, return_index = False):
130  """ finds a line satisfying the `test_condition` comming after the `line_index` """
131  # we're going forward the lines list
132  for line_index in xrange(line_index + 1, len(lines)):
133  line = lines[line_index]
134 
135  if test_condition(line):
136  if return_index:
137  return line_index
138  return line
139 
140  def firstTimeStampBefore(self, line_index, lines):
141  """ returns the first timestamp BEFORE the line with given index """
142 
143  return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
144 
145  def firstTimeStampAfter(self, line_index, lines):
146  """ returns the first timestamp AFTER the line with given index """
147 
148  return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
149 
150  def handleParsingError(self, message):
151  if self._DEBUG:
152  raise ValueError, message
153  print " ======== AND ERROR WHILE PARSING METADATA ===="
154  print message
155  print " =============== end ========================= "
156 
157  #IgProf_Perf, IgProf_Mem, Memcheck, Callgrind
158  #TODO: divide the input using separators
159 
160  """ reads the input cmsPerfsuite.log file """
161  def readInput(self, path, fileName = "cmsPerfSuite.log"):
162  try:
163  f = open(os.path.join(path, fileName), "r")
164  lines = [s.strip() for s in f.readlines()]
165  f.close()
166  except IOError:
167  lines = []
168 
169  #print self._lines
170  return lines
171 
172 
173 
174 
175  def getMachineInfo(self):
176  """ Returns the cpu and memory info """
177 
178  """ cpu info """
179 
180  """
181  we assume that:
182  * num_cores = max(core id+1) [it's counted from 0]
183  * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
184  * cpu MHz - is the speed of CPU
185  """
186  #TODO: BUT cpu MHz show not the maximum speed but current,
187  """
188  for
189  model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
190  cpu MHz : 800.000
191  cache size : 6144 KB
192  """
193  cpu_result = {}
194  try:
195  f= open(os.path.join(self._path, "cpuinfo"), "r")
196 
197  #we split data into a list of tuples = [(attr_name, attr_value), ...]
198  cpu_attributes = [l.strip().split(":") for l in f.readlines()]
199  #print cpu_attributes
200  f.close()
201  cpu_result = {
202  "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]), #Bug... Vidmantas used "core id"
203  "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
204  "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
205  "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
206  }
207  except IOError,e:
208  print e
209 
210 
211 
212 
213 
214  """ memory info """
215  mem_result = {}
216 
217  try:
218  f= open(os.path.join(self._path, "meminfo"), "r")
219 
220  #we split data into a list of tuples = [(attr_name, attr_value), ...]
221  mem_attributes = [l.strip().split(":") for l in f.readlines()]
222 
223  mem_result = {
224  "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
225  }
226 
227  except IOError,e:
228  print e
229 
230  cpu_result.update(mem_result)
231  return cpu_result
232 
233 
234 
235  def _applyParsingRules(self, parsing_rules, lines):
236  """
237  Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
238  to each line and if it matches the line,
239  puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
240  Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
241  rules = [
242  ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
243  ]
244  """
245  """ we call a shared parsing helper """
246  #parsing_rules = map(parsingRulesHelper.rulesRegexpCompileFunction, parsing_rules)
247  #print parsing_rules
248  (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
249 
250  self.missing_fields.extend(missing_fields)
251 
252  return info
253 
254 
255  def parseGeneralInfo(self):
256  lines = self.lines_general
257  """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
258  regexp while the second one is the regexp itself """
259  #TIP: don't forget that tuple of one ends with ,
260  parsing_rules = (
261  (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
262  (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
263  (("architecture",) ,r"""^Current Architecture is (.+)$"""),
264  (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
265  (("base_release_path",) , r"""^Base Release in: (.+)$"""),
266  (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
267 
268  (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
269 
270  (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
271  (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
272  (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
273  (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
274 
275  (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
276  (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
277 
278  (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
279  (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
280 
281  (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
282  (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
283 
284  (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
285  (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
286 
287 
288  (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
289  (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
290  (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
291 
292  (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
293 
294 
295  )
296  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
297  info = self._applyParsingRules(parsing_rules, lines)
298 
299 
300  """ postprocess the candles list """
301  candles = {}
302  for field, value in info.items():
303  if field.startswith("candles_"):
304  test = field.replace("candles_", "")
305  value = [v.strip(" '") for v in value.split(",")]
306  #if value:
307  candles[test]=value
308  del info[field]
309  #print candles
310  info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
311 
312 
313  """ TAGS """
314  """
315  --- Tag --- --- RelTag --- -------- Package --------
316  HEAD V05-03-06 IgTools/IgProf
317  V01-06-05 V01-06-04 Validation/Performance
318  ---------------------------------------
319  total packages: 2 (2 displayed)
320  """
321  tags_start_index = -1 # set some default
322  try:
323  tags_start_index = [i for i in xrange(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
324  except:
325  pass
326  if tags_start_index > -1:
327  tags_end_index = [i for i in xrange(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
328  # print "tags start index: %s, end index: %s" % (tags_start_index, tags_end_index)
329  tags = lines[tags_start_index:tags_end_index+2]
330  # print [tag.split(" ") for tag in tags]
331  # print "\n".join(tags)
332  else: # no tags found, make an empty list ...
333  tags = []
334  """ we join the tags with separator to store as simple string """
335  info["tags"] = self._LINE_SEPARATOR.join(tags)
336  #FILES/PATHS
337 
338 
339  """ get the command line """
340  try:
341  cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1 #that's the next line
342  info["command_line"] = lines[cmd_index]
343  except IndexError, e:
344  if self._DEBUG:
345  print e
346  info["command_line"] = ""
347 
348  try:
349  cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
350  cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
351  info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
352  except IndexError, e:
353  if self._DEBUG:
354  print e
355  info["command_line"] = ""
356 
357  return info
358 
359 
361  threads = {}
362  tests = {
363  #"IgProf_Perf": {}, "IgProf_Mem": {}, "Memcheck": {}, "Callgrind": {},
364  }
365 
366  lines = self.lines_other
367  """
368 
369  for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
370  * beginning ->> and start timestamp- the firstone:
371  Adding thread <simpleGenReportThread(Thread-1, started)> to the list of active threads
372  Launching the Memcheck tests on cpu 3 with 5 events each
373  Fri Aug 14 01:16:03 2009
374 
375  <... whatever might be here, might overlap with other test start/end messages ..>
376 
377  Fri Aug 14 02:13:18 2009
378  Memcheck test, in thread <simpleGenReportThread(Thread-1, stopped)> is done running on core 3
379  * ending - the last timestamp "before is done running ...."
380  """
381  # we take the first TimeStamp after the starting message and the first before the finishing message
382 
383 
384  #TODO: if threads would be changed it would stop working!!!
385 
386  # i.e. Memcheck, cpu, events
387  reStart = re.compile(r"""^Launching the (.*) tests on cpu (\d+) with (\d+) events each$""")
388  # i.e. Memcheck, thread name,core number
389  reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped\)> is done running on core (\d+)$""")
390 
391  #i.e. thread = Thread-1
392  reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started\)> to the list of active threads$""")
393 
394  reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
395  """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
396  for line_index in xrange(0, len(lines)):
397  line = lines[line_index]
398 
399  # * starting of test
400  if reStart.match(line):
401  #print reStart.match(line).groups()
402  testName, testCore, testEventsNum = reStart.match(line).groups()
403 
404  time = self.firstTimeStampAfter(line_index, lines)
405 
406  #find the name of Thread: it's one of the lines before
407  line_thread = self.findLineBefore(line_index, lines, test_condition=lambda l: reAddThread.match(l))
408  (thread_id, ) = reAddThread.match(line_thread).groups()
409 
410  #we add it to the list of threads as we DO NOT KNOW EXACT NAME OF TEST
411  if not threads.has_key(thread_id):
412  threads[thread_id] = {}
413  # this way we would get an Exception in case of unknown test name!
414  threads[thread_id].update({"name": testName, "events_num": testEventsNum, "core": testCore, "start": time, "thread_id": thread_id})
415 
416  # * or end of test
417  if reEnd.match(line):
418  testName, thread_id, testCore = reEnd.match(line).groups()
419  if not threads.has_key(testName):
420  threads[thread_id] = {}
421  #TODO: we get an exception if we found non existing
422 
423  time = self.firstTimeStampBefore(line_index, lines)
424  try:
425  exit_code = ""
426  #we search for the exit code
427  line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
428  exit_code, = reExitCode.match(line_exitcode).groups()
429  except Exception, e:
430  print "Error while getting exit code (Other test): %s" + str(e)
431 
432 
433  # this way we would get an Exception in case of unknown test name! So we would be warned if the format have changed
434  threads[thread_id].update({"end": time, "exit_code":exit_code})
435  for key, thread in threads.items():
436  tests[thread["name"]] = thread
437  return tests
438 
439 
440  def parseTimeSize(self):
441  """ parses the timeSize """
442  timesize_result = []
443 
444  # TODO: we will use the first timestamp after the "or these tests will use user input file..."
445  #TODO: do we have to save the name of input file somewhere?
446  """
447  the structure of input file:
448  * beginning ->> and start timestamp- the firstone:
449  >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
450  <...>
451  Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
452  Candle MinBias will be PROCESSED
453  You defined your own steps to run:
454  RAW2DIGI-RECO
455  *Candle MinBias
456  Written out cmsRelvalreport.py input file at:
457  /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
458  Thu Aug 13 14:53:37 2009 [start]
459  <....>
460  Thu Aug 13 16:04:48 2009 [end]
461  Individual cmsRelvalreport.py ExitCode 0
462  * ending - the last timestamp "... ExitCode ...."
463  """
464  #TODO: do we need the cmsDriver --conditions? I suppose it would the global per work directory = 1 perfsuite run (so samefor all candles in one work dir)
465  # TODO: which candle definition to use?
466  """ divide into separate jobs """
467  lines = self.lines_timesize
468  jobs = []
469  start = False
470  timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
471  for line_index in xrange(0, len(lines)):
472  line = lines[line_index]
473  # search for start of each TimeSize job (with a certain candle and step)
474  if timesize_start_indicator.match(line):
475  if start:
476  jobs.append(lines[start:line_index])
477  start = line_index
478  #add the last one
479  jobs.append(lines[start:len(lines)])
480  #print "\n".join(str(i) for i in jobs)
481 
482  parsing_rules = (
483  (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
484  #e.g.: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
485  (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
486  (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
487  # for this we cannot guarrantee that it has been found, TODO: we might count the number of pileup candles and compare with arguments
488  (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
489  #not shure if event content is required
490  (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
491  #TODO: after changeing the splitter to "taskset -c ..." this is no longer included into the part of correct job
492  #(("input_user_root_file", ), r"""^For these tests will use user input file (.+)$"""),
493  )
494 
495  #parse each of the TimeSize jobs: find candles, etc and start-end times
496 
497  reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
498 
499  if self._DEBUG:
500  print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
501 
502  for job_lines in jobs:
503  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
504  info = self._applyParsingRules(parsing_rules, job_lines)
505  #Fixing here the compatibility with new cmsdriver.py --conditions option (for which now we have autoconditions and FrontierConditions_GlobalTag is optional):
506  if 'auto:' in info['conditions']:
507  from Configuration.PyReleaseValidation.autoCond import autoCond
508  info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
509  else:
510  if 'FrontierConditions_GlobalTag' in info['conditions']:
511  info['conditions']=info['conditions'].split(",")[1]
512 
513  #DEBUG:
514  #print "CONDITIONS are: %s"%info['conditions']
515  #start time - the index after which comes the time stamp
516  """ the following is not available on one of the releases, instead
517  use the first timestamp available on our job - that's the starting time :) """
518 
519  #start_time_after = self.findFirstIndex_ofStartsWith(job_lines, "Written out cmsRelvalreport.py input file at:")
520  #print start_time_after
521  info["start"] = self.firstTimeStampAfter(0, job_lines)
522 
523  #TODO: improve in future (in case of some changes) we could use findBefore instead which uses the regexp as parameter for searching
524  #end time - the index before which comes the time stamp
525 
526  # On older files we have - "Individual Relvalreport.py ExitCode 0" instead of "Individual cmsRelvalreport.py ExitCode"
527  end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
528 
529  # on the same line we have the exit Code - so let's get it
530  nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
531 
532  info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
533  info["exit_code"] = exit_code
534 
535  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
536  steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
537  #probably it includes steps until we found *Candle... ?
538  steps = job_lines[steps_start + 1:steps_end]
539  if not self.validateSteps(steps):
540  self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
541 
542  """ quite nasty - just a work around """
543  print "Trying to recover from this error in case of old cmssw"
544 
545  """ we assume that steps are between the following sentance and a TimeStamp """
546  steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
547  steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
548 
549  steps = job_lines[steps_start + 1:steps_end]
550  if not self.validateSteps(steps):
551  self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
552  else:
553  print "RECOVERY SEEMS to be successful: %s" % str(steps)
554 
555  info["steps"] = self._LINE_SEPARATOR.join(steps) #!!!! STEPS MIGHT CONTAIN COMMA: ","
556 
557 
558  timesize_result.append(info)
559  return {"TimeSize": timesize_result}
560  #TODO:
561 
562 
563 
564  def readCmsScimarkTest(self, testName, testType, core):
565  lines = self.readInput(self._path, fileName = testName + ".log")
566  scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
567  for line in lines
568  if self.reCmsScimarkTest.match(line)]
569  #add the number of messurment
570  i = 0
571  for score in scores:
572  i += 1
573  score.update({"messurement_number": i})
574  return scores
575 
576  def readCmsScimark(self, main_cores = [1]):
577  main_core = main_cores[0]
578  #TODO: WE DO NOT ALWAYS REALLY KNOW THE MAIN CORE NUMBER! but we don't care too much
579  #we parse each of the SciMark files and the Composite scores
580  csimark = []
581  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
582  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
583 
584 
585  #we not always know the number of cores available so we will just search the directory to find out core numbers
586  reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
587  scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
588  for f in os.listdir(self._path)
589  if reIsCsiMark_notusedcore.match(f)
590  and os.path.isfile(os.path.join(self._path, f)) ]
591 
592  for core_number in scimark_files:
593  try:
594  csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
595  except IOError, e:
596  if self._DEBUG:
597  print e
598  return csimark
599  #print csimark
600 
601  #get IgProf summary information from the sql3 files
602  def getIgSummary(self):
603  igresult = []
604  globbed = glob.glob(os.path.join(self._path, "../*/IgProfData/*/*/*.sql3"))
605 
606  for f in globbed:
607  #print f
608  profileInfo = self.getSummaryInfo(f)
609  if not profileInfo:
610  continue
611  cumCounts, cumCalls = profileInfo
612  dump, architecture, release, rest = f.rsplit("/", 3)
613  candle, sequence, pileup, conditions, process, counterType, events = rest.split("___")
614  events = events.replace(".sql3", "")
615  igresult.append({"counter_type": counterType, "event": events, "cumcounts": cumCounts, "cumcalls": cumCalls})
616 
617  return igresult
618 
619  def getSummaryInfo(self, database):
620  summary_query="""SELECT counter, total_count, total_freq, tick_period
621  FROM summary;"""
622  error, output = self.doQuery(summary_query, database)
623  if error or not output or output.count("\n") > 1:
624  return None
625  counter, total_count, total_freq, tick_period = output.split("@@@")
626  if counter == "PERF_TICKS":
627  return float(tick_period) * float(total_count), int(total_freq)
628  else:
629  return int(total_count), int(total_freq)
630 
631  def doQuery(self, query, database):
632  if os.path.exists("/usr/bin/sqlite3"):
633  sqlite="/usr/bin/sqlite3"
634  else:
635  sqlite="/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
636  return getstatusoutput("echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
637 
639  """
640  checks if the suite has successfully finished
641  and if the tarball was successfully archived and uploaded to the castor """
642 
643  parsing_rules = (
644  (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
645  (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
646  (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
647  #TODO: WE MUST HAVE THE CASTOR URL, but for some of files it's not included [probably crashed]
648  (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
649  (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
650  )
651 
652 
653  """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
654  info = self._applyParsingRules(parsing_rules, self.lines_other)
655 
656  """ did we detect any errors in log files ? """
657  info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
658  if not info["successfully_archived_tarball"]:
659  info["castor_file_url"] = ""
660 
661  if not info["castor_file_url"]:
662  #TODO: get the castor file url or abort
663  self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
664  lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
665 
666  url = ""
667  try:
668  print "HERE!"
669  url=self.get_tarball_fromlog()
670  print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
671 
672  except:
673  if os.environ.has_key("PERFDB_CASTOR_FILE_URL"):
674  url = os.environ["PERFDB_CASTOR_FILE_URL"]
675 
676  else: #FIXME: add the possibility to get it directly from the cmsPerfSuite.log file (make sure it is dumped there before doing the tarball itself...)
677  print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
678  self.handleParsingError( "Castor tarball URL not found. Provide interactively")
679 
680  while True:
681 
682  if lmdb_castor_url_is_valid(url):
683  info["castor_file_url"] = url
684  break
685  print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
686  url = sys.stdin.readline()
687 
688 
689  return info
691  '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
692  print "Getting the url from the cmsPerfSuite.log"
693  log=open("cmsPerfSuite.log","r")
694  castor_dir="UNKNOWN_CASTOR_DIR"
695  tarball="UNKNOWN_TARBALL"
696  for line in log.readlines():
697  if 'castordir' in line:
698  castor_dir=line.split()[1]
699  if 'tgz' in line and tarball=="UNKNOWN_TARBALL": #Pick the first line that contains the tar command...
700  if 'tar' in line:
701  tarball=os.path.basename(line.split()[2])
702  castor_tarball=os.path.join(castor_dir,tarball)
703  return castor_tarball
704 
705  def parseAll(self):
706  result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, "IgSummary":{}, 'unrecognized_jobs': []}
707 
708  """ all the general info - start, arguments, host etc """
709  result["General"].update(self.parseGeneralInfo())
710 
711  """ machine info - cpu, memmory """
712  result["General"].update(self.getMachineInfo())
713 
714  """ we add info about how successfull was the run, when it finished and final castor url to the file! """
715  result["General"].update(self.parseTheCompletion())
716 
717  try:
718  result["TestResults"].update(self.parseTimeSize())
719  except Exception, e:
720  print "BAD BAD BAD UNHANDLED ERROR" + str(e)
721 
722 
723  #TODO:
724  #Check what Vidmantas was doing in the parseAllOtherTests, de facto it is not used now, so commenting it for now (to avoid the "BAD BAD BAD...."
725  #try:
726  # result["TestResults"].update(self.parseAllOtherTests())
727  #except Exception, e:
728  # print "BAD BAD BAD UNHANDLED ERROR" + str(e)
729 
730 
731  main_cores = [result["General"]["run_on_cpus"]]
732  num_cores = result["General"].get("num_cores", 0)
733  #DEBUG
734  #print "Number of cores was: %s"%num_cores
735  #TODO: temporarly - search for cores, use regexp
736  main_cores = [1]
737 
738  # THE MAHCINE SCIMARKS
739  result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
740  result["IgSummary"] = self.getIgSummary()
741 
742 
743 
744  if self.missing_fields:
745  self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
746 
747  return result
748 
749 
750 
751 if __name__ == "__main__":
752  from xml.dom import minidom
753  import cmssw_exportdb_xml
754  #steps do not get parsed corectly
755  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_1_0_pre7_--usersteps=RAW2DIGI-RECO_lxbuild107.cern.ch_relval/relval/CMSSW_3_1_0_pre7/work2"
756  #path = "/home/vidma/Desktop/CERN_code/cmssw/data/CMSSW_3_2_0_--usersteps=GEN-SIM,DIGI_lxbuild106.cern.ch_relval/relval/CMSSW_3_2_0/workGENSIMDIGI"
757  #includes finishing time, succesfully archived tarball etc
758  #path = "/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PileUp"
759  path = os.path.abspath(".") #Better to point to the local dir than to some old Vidmantas' laptop dirs ;)
760  #p = parserPerfsuiteMetadata("/home/vidma/Desktop/CERN_code/cmssw/CVS_PerfSuiteDB/COMP/PerfSuiteDB/export_data_to_xml/example_of_files/PerfsuiteRun")
762  run_info = p.parseAll()
763 
764  #print "======= GENERAL ========= "
765  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["General"].items())
766  #print "======= Test results ========= "
767  #print "\n".join("%s : %s" % (k, v) for k, v in p.parseAll()["TestResults"].items())
768 
769  xml_doc = minidom.Document()
770  cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
771  #print "General info:" + str(p.parseGeneralInfo())
772  import doctest
773  doctest.testmod()
774 
775  #print p.readCmsScimark()
776 
777 
void strip(std::string &input, const std::string &blanks=" \n\t")
Definition: stringTools.cc:16
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
#define update(a, b)
double split
Definition: MVATrainer.cc:139
T get(const Candidate &c)
Definition: component.h:56