4 import parsingRulesHelper
6 from commands
import getstatusoutput
10 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
13 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
14 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
17 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
20 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
21 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
25 """ Simple function for error detection. TODO: we could use a list of possible steps also """
26 return not (
not steps
or len(steps) > self.
_MAX_STEPS)
36 """ some initialisation to speedup the other functions """
41 """ the separator for beginning of timeSize / end of general statistics """
42 self.
_timeSizeStart = re.compile(
r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
43 """ (the first timestamp is the start of TimeSize) """
46 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
47 self.
_timeSizeEnd = re.compile(
r"""^Stopping all cmsScimark jobs now$""")
53 ----- READ THE DATA -----
56 """ split the whole file into parts """
62 timesize_end= [lines.index(line)
for line
in lines
if self._timeSizeEnd.match(line)]
64 timesize_end_index = timesize_end[0]
67 timesize_start=[lines.index(line)
for line
in lines
if self._timeSizeStart.match(line)]
68 general_stop=[lines.index(line)
for line
in lines
if self._otherStart.match(line)]
70 timesize_start_index = timesize_start[0]
71 general_stop_index = timesize_start_index
73 timesize_start_index=timesize_end_index+1
74 general_stop_index=general_stop[0]
76 timesize_start_index=0
79 """ we split the structure:
82 * all others [igprof etc]
85 """ we get the indexes of spliting """
92 """ a list of missing fields """
98 Returns whether the string is a timestamp (if not returns None)
100 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
102 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
105 datetime_format =
"%a %b %d %H:%M:%S %Y"
107 time.strptime(line, datetime_format)
114 return [job_lines.index(line)
115 for line
in job_lines
116 if line.startswith(start_of_line)][0]
119 """ finds a line satisfying the `test_condition` comming before the `line_index` """
121 for line_index
in xrange(line_index -1, -1, -1):
122 line = lines[line_index]
124 if test_condition(line):
129 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
130 """ finds a line satisfying the `test_condition` comming after the `line_index` """
132 for line_index
in xrange(line_index + 1, len(lines)):
133 line = lines[line_index]
135 if test_condition(line):
141 """ returns the first timestamp BEFORE the line with given index """
146 """ returns the first timestamp AFTER the line with given index """
152 raise ValueError, message
153 print " ======== AND ERROR WHILE PARSING METADATA ===="
155 print " =============== end ========================= "
160 """ reads the input cmsPerfsuite.log file """
161 def readInput(self, path, fileName = "cmsPerfSuite.log"):
163 f = open(os.path.join(path, fileName),
"r")
164 lines = [s.strip() for s
in f.readlines()]
176 """ Returns the cpu and memory info """
182 * num_cores = max(core id+1) [it's counted from 0]
183 * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
184 * cpu MHz - is the speed of CPU
189 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
195 f= open(os.path.join(self.
_path,
"cpuinfo"),
"r")
198 cpu_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
202 "num_cores": max ([int(attr[1].strip())+1
for attr
in cpu_attributes
if attr[0].strip() ==
"processor"]),
203 "cpu_speed_MHZ": max ([attr[1].strip()
for attr
in cpu_attributes
if attr[0].strip() ==
"cpu MHz"]),
204 "cpu_cache_size": [attr[1].strip()
for attr
in cpu_attributes
if attr[0].strip() ==
"cache size"][0],
205 "cpu_model_name": [attr[1].strip()
for attr
in cpu_attributes
if attr[0].strip() ==
"model name"][0]
218 f= open(os.path.join(self.
_path,
"meminfo"),
"r")
221 mem_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
224 "memory_total_ram": [attr[1].strip()
for attr
in mem_attributes
if attr[0].strip() ==
"MemTotal"][0]
230 cpu_result.update(mem_result)
237 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
238 to each line and if it matches the line,
239 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
240 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
242 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
245 """ we call a shared parsing helper """
250 self.missing_fields.extend(missing_fields)
257 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
258 regexp while the second one is the regexp itself """
261 ((
"",
"num_cores",
"run_on_cpus"),
r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
262 ((
"start_time",
"host",
"local_workdir",
"user"),
r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""",
"req"),
263 ((
"architecture",) ,
r"""^Current Architecture is (.+)$"""),
264 ((
"test_release_based_on",),
r"""^Test Release based on: (.+)$""",
"req"),
265 ((
"base_release_path",) ,
r"""^Base Release in: (.+)$"""),
266 ((
"test_release_local_path",) ,
r"""^Your Test release in: (.+)$"""),
268 ((
"castor_dir",) ,
r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
270 ((
"TimeSize_events",) ,
r"""^(\d+) TimeSize events$"""),
271 ((
"IgProf_events",) ,
r"""^(\d+) IgProf events$"""),
272 ((
"CallGrind_events",) ,
r"""^(\d+) Callgrind events$"""),
273 ((
"Memcheck_events",) ,
r"""^(\d+) Memcheck events$"""),
275 ((
"candles_TimeSize",) ,
r"""^TimeSizeCandles \[(.*)\]$"""),
276 ((
"candles_TimeSizePU",) ,
r"""^TimeSizePUCandles \[(.*)\]$"""),
278 ((
"candles_Memcheck",) ,
r"""^MemcheckCandles \[(.*)\]$"""),
279 ((
"candles_MemcheckPU",) ,
r"""^MemcheckPUCandles \[(.*)\]$"""),
281 ((
"candles_Callgrind",) ,
r"""^CallgrindCandles \[(.*)\]$"""),
282 ((
"candles_CallgrindPU",) ,
r"""^CallgrindPUCandles \[(.*)\]$"""),
284 ((
"candles_IgProfPU",) ,
r"""^IgProfPUCandles \[(.*)\]$"""),
285 ((
"candles_IgProf",) ,
r"""^IgProfCandles \[(.*)\]$"""),
288 ((
"cmsScimark_before",) ,
r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
289 ((
"cmsScimark_after",) ,
r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
290 ((
"cmsDriverOptions",) ,
r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
292 ((
"HEPSPEC06_SCORE",) ,
r"""^This machine's HEPSPEC06 score is: (.+)$"""),
296 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
300 """ postprocess the candles list """
302 for field, value
in info.items():
303 if field.startswith(
"candles_"):
304 test = field.replace(
"candles_",
"")
305 value = [v.strip(
" '")
for v
in value.split(
",")]
310 info[
"candles"] = self._LINE_SEPARATOR.join([k+
":"+
",".
join(v)
for (k, v)
in candles.items()])
315 --- Tag --- --- RelTag --- -------- Package --------
316 HEAD V05-03-06 IgTools/IgProf
317 V01-06-05 V01-06-04 Validation/Performance
318 ---------------------------------------
319 total packages: 2 (2 displayed)
321 tags_start_index = -1
323 tags_start_index = [i
for i
in xrange(0, len(lines))
if lines[i].startswith(
"--- Tag ---")][0]
326 if tags_start_index > -1:
327 tags_end_index = [i
for i
in xrange(tags_start_index + 1, len(lines))
if lines[i].startswith(
"---------------------------------------")][0]
329 tags = lines[tags_start_index:tags_end_index+2]
334 """ we join the tags with separator to store as simple string """
335 info[
"tags"] = self._LINE_SEPARATOR.join(tags)
339 """ get the command line """
342 info[
"command_line"] = lines[cmd_index]
343 except IndexError, e:
346 info[
"command_line"] =
""
351 info[
"command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
352 except IndexError, e:
355 info[
"command_line"] =
""
365 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
367 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
368 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
370 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
372 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
381 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
382 * beginning ->> and start timestamp- the firstone:
383 Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each
384 Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads
385 Mon Jun 14 20:06:54 2010
387 <... whatever might be here, might overlap with other test start/end messages ..>
389 Mon Jun 14 21:59:33 2010
390 IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4
392 * ending - the last timestamp "before is done running ...."
399 reSubmit = re.compile(
r"""^Let's submit (.+) test on core (\d+)$""")
401 reStart = re.compile(
r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
404 reEnd = re.compile(
r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
406 reAddThread = re.compile(
r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
408 reWaiting = re.compile(
r"""^Waiting for tests to be done...$""")
410 reExitCode = re.compile(
r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
411 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
416 for line_index
in xrange(0, len(lines)):
417 line = lines[line_index]
418 if reSubmit.match(line):
419 end_index = self.
findLineAfter(line_index, lines, test_condition=
lambda l: reWaiting.match(l), return_index =
True)
420 jobs.append(lines[line_index:end_index])
422 for job_lines
in jobs:
427 if 'auto:' in info[
'conditions']:
428 from Configuration.AlCa.autoCond
import autoCond
429 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
431 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
432 info[
'conditions']=info[
'conditions'].
split(
",")[1]
437 steps = job_lines[steps_start + 1:steps_end]
439 self.
handleParsingError(
"Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
441 """ quite nasty - just a work around """
442 print "Trying to recover from this error in case of old cmssw"
444 """ we assume that steps are between the following sentance and a TimeStamp """
448 steps = job_lines[steps_start + 1:steps_end]
450 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
452 print "RECOVERY SEEMS to be successful: %s" % str(steps)
454 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
456 start_id_index = self.
findLineAfter(0, job_lines, test_condition = reStart.match, return_index =
True)
457 pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
458 info[
"testname"] = testName
460 thread_id_index = self.
findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index =
True)
463 thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
464 info[
"thread_id"] = thread_id
466 if not test.has_key(testName):
468 test[testName].
append(info)
470 for line_index
in xrange(0, len(lines)):
471 line = lines[line_index]
473 if reEnd.match(line):
474 testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
479 line_exitcode = self.
findLineBefore(line_index, lines, test_condition=
lambda l: reExitCode.match(l))
480 exit_code, = reExitCode.match(line_exitcode).groups()
482 print "Error while getting exit code (Other test): %s" + str(e)
484 for key, thread
in test.items():
485 for i
in range(0, len(thread)):
486 if thread[i][
"thread_id"] == thread_id:
487 thread[i].
update({
"end": time,
"exit_code": exit_code})
494 """ parses the timeSize """
500 the structure of input file:
501 * beginning ->> and start timestamp- the firstone:
502 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
504 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
505 Candle MinBias will be PROCESSED
506 You defined your own steps to run:
509 Written out cmsRelvalreport.py input file at:
510 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
511 Thu Aug 13 14:53:37 2009 [start]
513 Thu Aug 13 16:04:48 2009 [end]
514 Individual cmsRelvalreport.py ExitCode 0
515 * ending - the last timestamp "... ExitCode ...."
519 """ divide into separate jobs """
523 timesize_start_indicator = re.compile(
r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
524 for line_index
in xrange(0, len(lines)):
525 line = lines[line_index]
527 if timesize_start_indicator.match(line):
529 jobs.append(lines[start:line_index])
532 jobs.append(lines[start:len(lines)])
536 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
538 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
539 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
541 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
543 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
550 reExit_code = re.compile(
r"""Individual ([^\s]+) ExitCode (\d+)""")
553 print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
555 for job_lines
in jobs:
556 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
559 if 'auto:' in info[
'conditions']:
560 from Configuration.AlCa.autoCond
import autoCond
561 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
563 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
564 info[
'conditions']=info[
'conditions'].
split(
",")[1]
569 """ the following is not available on one of the releases, instead
570 use the first timestamp available on our job - that's the starting time :) """
580 end_time_before = self.
findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index =
True)
583 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
586 info[
"exit_code"] = exit_code
591 steps = job_lines[steps_start + 1:steps_end]
593 self.
handleParsingError(
"Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
595 """ quite nasty - just a work around """
596 print "Trying to recover from this error in case of old cmssw"
598 """ we assume that steps are between the following sentance and a TimeStamp """
602 steps = job_lines[steps_start + 1:steps_end]
604 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
606 print "RECOVERY SEEMS to be successful: %s" % str(steps)
608 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
611 timesize_result.append(info)
612 return {
"TimeSize": timesize_result}
619 scores = [{
"score": self.reCmsScimarkTest.match(line).groups()[1],
"type": testType,
"core": core}
621 if self.reCmsScimarkTest.match(line)]
626 score.update({
"messurement_number": i})
630 main_core = main_cores[0]
634 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2", testType =
"mainCore", core = main_core))
635 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2_large", testType =
"mainCore_Large", core = main_core))
639 reIsCsiMark_notusedcore = re.compile(
"^cmsScimark_(\d+).log$")
640 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
641 for f
in os.listdir(self.
_path)
642 if reIsCsiMark_notusedcore.match(f)
643 and os.path.isfile(os.path.join(self.
_path, f)) ]
645 for core_number
in scimark_files:
647 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark_%s" % str(core_number), testType =
"NotUsedCore_%s" %str(core_number), core = core_number))
656 checks if the suite has successfully finished
657 and if the tarball was successfully archived and uploaded to the castor """
660 ((
"finishing_time",
"",
""),
r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
661 ((
"castor_md5",) ,
r"""^The md5 checksum of the tarball: (.+)$"""),
662 ((
"successfully_archived_tarball", ),
r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
664 ((
"castor_file_url",),
r"""^The tarball can be found: (.+)$"""),
665 ((
"castor_logfile_url",),
r"""^The logfile can be found: (.+)$"""),
669 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
672 """ did we detect any errors in log files ? """
673 info[
"no_errors_detected"] = [line
for line
in self.
lines_other if line ==
"There were no errors detected in any of the log files!"]
and "1" or "0"
674 if not info[
"successfully_archived_tarball"]:
675 info[
"castor_file_url"] =
""
677 if not info[
"castor_file_url"]:
679 self.
handleParsingError(
"Castor tarball URL not found. Trying to get from environment")
680 lmdb_castor_url_is_valid =
lambda url: url.startswith(
"/castor/")
686 print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
689 if os.environ.has_key(
"PERFDB_CASTOR_FILE_URL"):
690 url = os.environ[
"PERFDB_CASTOR_FILE_URL"]
693 print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
698 if lmdb_castor_url_is_valid(url):
699 info[
"castor_file_url"] = url
701 print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
702 if os.isatty(0): url = sys.stdin.readline()
703 else:
raise IOError(
"stdin is closed.")
708 '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
709 print "Getting the url from the cmsPerfSuite.log"
710 log=open(
"cmsPerfSuite.log",
"r")
711 castor_dir="UNKNOWN_CASTOR_DIR"
712 tarball=
"UNKNOWN_TARBALL"
713 for line
in log.readlines():
714 if 'castordir' in line:
715 castor_dir=line.split()[1]
716 if 'tgz' in line
and tarball==
"UNKNOWN_TARBALL":
718 tarball=os.path.basename(line.split()[2])
719 castor_tarball=os.path.join(castor_dir,tarball)
720 return castor_tarball
723 result = {
"General": {},
"TestResults":{},
"cmsSciMark":{},
'unrecognized_jobs': []}
725 """ all the general info - start, arguments, host etc """
728 """ machine info - cpu, memmory """
731 """ we add info about how successfull was the run, when it finished and final castor url to the file! """
734 print "Parsing TimeSize runs..."
739 print "BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " + str(e)
741 print "Parsing Other(IgProf, Memcheck, ...) runs..."
745 print "BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " + str(e)
750 main_cores = [result[
"General"][
"run_on_cpus"]]
751 num_cores = result[
"General"].
get(
"num_cores", 0)
758 result[
"cmsSciMark"] = self.
readCmsScimark(main_cores = main_cores)
767 if __name__ ==
"__main__":
768 from xml.dom
import minidom
769 import cmssw_exportdb_xml
775 path = os.path.abspath(
".")
778 run_info = p.parseAll()
785 xml_doc = minidom.Document()
static std::string join(char **cmd)
T get(const Candidate &c)