1 from __future__
import print_function
5 import parsingRulesHelper
7 from commands
import getstatusoutput
11 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ): 14 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result. 15 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo 18 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:") 21 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time. 22 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>. 26 """ Simple function for error detection. TODO: we could use a list of possible steps also """ 27 return not (
not steps
or len(steps) > self.
_MAX_STEPS)
37 """ some initialisation to speedup the other functions """ 42 """ the separator for beginning of timeSize / end of general statistics """ 43 self.
_timeSizeStart = re.compile(
r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
44 """ (the first timestamp is the start of TimeSize) """ 47 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """ 48 self.
_timeSizeEnd = re.compile(
r"""^Stopping all cmsScimark jobs now$""")
54 ----- READ THE DATA ----- 57 """ split the whole file into parts """ 63 timesize_end= [lines.index(line)
for line
in lines
if self._timeSizeEnd.match(line)]
65 timesize_end_index = timesize_end[0]
68 timesize_start=[lines.index(line)
for line
in lines
if self._timeSizeStart.match(line)]
69 general_stop=[lines.index(line)
for line
in lines
if self._otherStart.match(line)]
71 timesize_start_index = timesize_start[0]
72 general_stop_index = timesize_start_index
74 timesize_start_index=timesize_end_index+1
75 general_stop_index=general_stop[0]
77 timesize_start_index=0
80 """ we split the structure: 83 * all others [igprof etc] 86 """ we get the indexes of spliting """ 93 """ a list of missing fields """ 99 Returns whether the string is a timestamp (if not returns None) 101 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009") 103 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009") 106 datetime_format =
"%a %b %d %H:%M:%S %Y" 108 time.strptime(line, datetime_format)
115 return [job_lines.index(line)
116 for line
in job_lines
117 if line.startswith(start_of_line)][0]
120 """ finds a line satisfying the `test_condition` comming before the `line_index` """ 122 for line_index
in xrange(line_index -1, -1, -1):
123 line = lines[line_index]
125 if test_condition(line):
130 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
131 """ finds a line satisfying the `test_condition` comming after the `line_index` """ 133 for line_index
in xrange(line_index + 1, len(lines)):
134 line = lines[line_index]
136 if test_condition(line):
142 """ returns the first timestamp BEFORE the line with given index """ 147 """ returns the first timestamp AFTER the line with given index """ 153 raise ValueError(message)
154 print(
" ======== AND ERROR WHILE PARSING METADATA ====")
156 print(
" =============== end ========================= ")
161 """ reads the input cmsPerfsuite.log file """ 162 def readInput(self, path, fileName = "cmsPerfSuite.log"):
164 f = open(os.path.join(path, fileName),
"r") 165 lines = [s.strip() for s
in f.readlines()]
177 """ Returns the cpu and memory info """ 183 * num_cores = max(core id+1) [it's counted from 0] 184 * 'model name' is processor type [we will return only the first one - we assume others to be same!!?? 185 * cpu MHz - is the speed of CPU 190 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz 196 f= open(os.path.join(self.
_path,
"cpuinfo"),
"r") 199 cpu_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
203 "num_cores": max ([
int(attr[1].
strip())+1
for attr
in cpu_attributes
if attr[0].
strip() ==
"processor"]),
204 "cpu_speed_MHZ": max ([attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cpu MHz"]),
205 "cpu_cache_size": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cache size"][0],
206 "cpu_model_name": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"model name"][0]
219 f= open(os.path.join(self.
_path,
"meminfo"),
"r") 222 mem_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
225 "memory_total_ram": [attr[1].
strip()
for attr
in mem_attributes
if attr[0].
strip() ==
"MemTotal"][0]
231 cpu_result.update(mem_result)
238 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules) 239 to each line and if it matches the line, 240 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned 241 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised. 243 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ ) 246 """ we call a shared parsing helper """ 251 self.missing_fields.extend(missing_fields)
258 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the 259 regexp while the second one is the regexp itself """ 262 ((
"",
"num_cores",
"run_on_cpus"),
r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
263 ((
"start_time",
"host",
"local_workdir",
"user"),
r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""",
"req"),
264 ((
"architecture",) ,
r"""^Current Architecture is (.+)$"""),
265 ((
"test_release_based_on",),
r"""^Test Release based on: (.+)$""",
"req"),
266 ((
"base_release_path",) ,
r"""^Base Release in: (.+)$"""),
267 ((
"test_release_local_path",) ,
r"""^Your Test release in: (.+)$"""),
269 ((
"castor_dir",) ,
r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
271 ((
"TimeSize_events",) ,
r"""^(\d+) TimeSize events$"""),
272 ((
"IgProf_events",) ,
r"""^(\d+) IgProf events$"""),
273 ((
"CallGrind_events",) ,
r"""^(\d+) Callgrind events$"""),
274 ((
"Memcheck_events",) ,
r"""^(\d+) Memcheck events$"""),
276 ((
"candles_TimeSize",) ,
r"""^TimeSizeCandles \[(.*)\]$"""),
277 ((
"candles_TimeSizePU",) ,
r"""^TimeSizePUCandles \[(.*)\]$"""),
279 ((
"candles_Memcheck",) ,
r"""^MemcheckCandles \[(.*)\]$"""),
280 ((
"candles_MemcheckPU",) ,
r"""^MemcheckPUCandles \[(.*)\]$"""),
282 ((
"candles_Callgrind",) ,
r"""^CallgrindCandles \[(.*)\]$"""),
283 ((
"candles_CallgrindPU",) ,
r"""^CallgrindPUCandles \[(.*)\]$"""),
285 ((
"candles_IgProfPU",) ,
r"""^IgProfPUCandles \[(.*)\]$"""),
286 ((
"candles_IgProf",) ,
r"""^IgProfCandles \[(.*)\]$"""),
289 ((
"cmsScimark_before",) ,
r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
290 ((
"cmsScimark_after",) ,
r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
291 ((
"cmsDriverOptions",) ,
r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
293 ((
"HEPSPEC06_SCORE",) ,
r"""^This machine's HEPSPEC06 score is: (.+)$"""),
297 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 301 """ postprocess the candles list """ 303 for field, value
in info.items():
304 if field.startswith(
"candles_"):
305 test = field.replace(
"candles_",
"")
306 value = [v.strip(
" '")
for v
in value.split(
",")]
311 info[
"candles"] = self._LINE_SEPARATOR.join([k+
":"+
",".
join(v)
for (k, v)
in candles.items()])
316 --- Tag --- --- RelTag --- -------- Package -------- 317 HEAD V05-03-06 IgTools/IgProf 318 V01-06-05 V01-06-04 Validation/Performance 319 --------------------------------------- 320 total packages: 2 (2 displayed) 322 tags_start_index = -1
324 tags_start_index = [i
for i
in xrange(0, len(lines))
if lines[i].startswith(
"--- Tag ---")][0]
327 if tags_start_index > -1:
328 tags_end_index = [i
for i
in xrange(tags_start_index + 1, len(lines))
if lines[i].startswith(
"---------------------------------------")][0]
330 tags = lines[tags_start_index:tags_end_index+2]
335 """ we join the tags with separator to store as simple string """ 336 info[
"tags"] = self._LINE_SEPARATOR.join(tags)
340 """ get the command line """ 343 info[
"command_line"] = lines[cmd_index]
344 except IndexError
as e:
347 info[
"command_line"] =
"" 352 info[
"command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
353 except IndexError
as e:
356 info[
"command_line"] =
"" 366 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
368 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
369 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
371 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
373 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
382 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file: 383 * beginning ->> and start timestamp- the firstone: 384 Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each 385 Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads 386 Mon Jun 14 20:06:54 2010 388 <... whatever might be here, might overlap with other test start/end messages ..> 390 Mon Jun 14 21:59:33 2010 391 IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4 393 * ending - the last timestamp "before is done running ...." 400 reSubmit = re.compile(
r"""^Let's submit (.+) test on core (\d+)$""")
402 reStart = re.compile(
r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
405 reEnd = re.compile(
r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
407 reAddThread = re.compile(
r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
409 reWaiting = re.compile(
r"""^Waiting for tests to be done...$""")
411 reExitCode = re.compile(
r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
412 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """ 417 for line_index
in xrange(0, len(lines)):
418 line = lines[line_index]
419 if reSubmit.match(line):
420 end_index = self.
findLineAfter(line_index, lines, test_condition=
lambda l: reWaiting.match(l), return_index =
True)
421 jobs.append(lines[line_index:end_index])
423 for job_lines
in jobs:
428 if 'auto:' in info[
'conditions']:
430 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
432 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
433 info[
'conditions']=info[
'conditions'].
split(
",")[1]
438 steps = job_lines[steps_start + 1:steps_end]
442 """ quite nasty - just a work around """ 443 print(
"Trying to recover from this error in case of old cmssw")
445 """ we assume that steps are between the following sentance and a TimeStamp """ 449 steps = job_lines[steps_start + 1:steps_end]
451 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (
str(steps),
str(job_lines)))
453 print(
"RECOVERY SEEMS to be successful: %s" %
str(steps))
455 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
457 start_id_index = self.
findLineAfter(0, job_lines, test_condition = reStart.match, return_index =
True)
458 pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
459 info[
"testname"] = testName
461 thread_id_index = self.
findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index =
True)
464 thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
465 info[
"thread_id"] = thread_id
467 if testName
not in test:
469 test[testName].
append(info)
471 for line_index
in xrange(0, len(lines)):
472 line = lines[line_index]
474 if reEnd.match(line):
475 testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
480 line_exitcode = self.
findLineBefore(line_index, lines, test_condition=
lambda l: reExitCode.match(l))
481 exit_code, = reExitCode.match(line_exitcode).groups()
482 except Exception
as e:
483 print(
"Error while getting exit code (Other test): %s" +
str(e))
485 for key, thread
in test.items():
486 for i
in range(0, len(thread)):
487 if thread[i][
"thread_id"] == thread_id:
488 thread[i].
update({
"end": time,
"exit_code": exit_code})
495 """ parses the timeSize """ 501 the structure of input file: 502 * beginning ->> and start timestamp- the firstone: 503 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root] 505 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM 506 Candle MinBias will be PROCESSED 507 You defined your own steps to run: 510 Written out cmsRelvalreport.py input file at: 511 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt 512 Thu Aug 13 14:53:37 2009 [start] 514 Thu Aug 13 16:04:48 2009 [end] 515 Individual cmsRelvalreport.py ExitCode 0 516 * ending - the last timestamp "... ExitCode ...." 520 """ divide into separate jobs """ 524 timesize_start_indicator = re.compile(
r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
525 for line_index
in xrange(0, len(lines)):
526 line = lines[line_index]
528 if timesize_start_indicator.match(line):
530 jobs.append(lines[start:line_index])
533 jobs.append(lines[start:len(lines)])
537 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
539 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
540 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
542 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
544 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
551 reExit_code = re.compile(
r"""Individual ([^\s]+) ExitCode (\d+)""")
554 print(
"TimeSize (%d) jobs: %s" % (len(jobs),
str(jobs)))
556 for job_lines
in jobs:
557 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 560 if 'auto:' in info[
'conditions']:
562 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
564 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
565 info[
'conditions']=info[
'conditions'].
split(
",")[1]
570 """ the following is not available on one of the releases, instead 571 use the first timestamp available on our job - that's the starting time :) """ 581 end_time_before = self.
findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index =
True)
584 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
587 info[
"exit_code"] = exit_code
592 steps = job_lines[steps_start + 1:steps_end]
596 """ quite nasty - just a work around """ 597 print(
"Trying to recover from this error in case of old cmssw")
599 """ we assume that steps are between the following sentance and a TimeStamp """ 603 steps = job_lines[steps_start + 1:steps_end]
605 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (
str(steps),
str(job_lines)))
607 print(
"RECOVERY SEEMS to be successful: %s" %
str(steps))
609 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
612 timesize_result.append(info)
613 return {
"TimeSize": timesize_result}
620 scores = [{
"score": self.reCmsScimarkTest.match(line).groups()[1],
"type": testType,
"core": core}
622 if self.reCmsScimarkTest.match(line)]
627 score.update({
"messurement_number": i})
631 main_core = main_cores[0]
635 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2", testType =
"mainCore", core = main_core))
636 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2_large", testType =
"mainCore_Large", core = main_core))
640 reIsCsiMark_notusedcore = re.compile(
"^cmsScimark_(\d+).log$")
641 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
642 for f
in os.listdir(self.
_path)
643 if reIsCsiMark_notusedcore.match(f)
644 and os.path.isfile(os.path.join(self.
_path, f)) ]
646 for core_number
in scimark_files:
648 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark_%s" %
str(core_number), testType =
"NotUsedCore_%s" %
str(core_number), core = core_number))
657 checks if the suite has successfully finished 658 and if the tarball was successfully archived and uploaded to the castor """ 661 ((
"finishing_time",
"",
""),
r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
662 ((
"castor_md5",) ,
r"""^The md5 checksum of the tarball: (.+)$"""),
663 ((
"successfully_archived_tarball", ),
r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
665 ((
"castor_file_url",),
r"""^The tarball can be found: (.+)$"""),
666 ((
"castor_logfile_url",),
r"""^The logfile can be found: (.+)$"""),
670 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 673 """ did we detect any errors in log files ? """ 674 info[
"no_errors_detected"] = [line
for line
in self.
lines_other if line ==
"There were no errors detected in any of the log files!"]
and "1" or "0" 675 if not info[
"successfully_archived_tarball"]:
676 info[
"castor_file_url"] =
"" 678 if not info[
"castor_file_url"]:
680 self.
handleParsingError(
"Castor tarball URL not found. Trying to get from environment")
681 lmdb_castor_url_is_valid =
lambda url: url.startswith(
"/castor/")
687 print(
"Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url)
690 if "PERFDB_CASTOR_FILE_URL" in os.environ:
691 url = os.environ[
"PERFDB_CASTOR_FILE_URL"]
694 print(
"Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL")
699 if lmdb_castor_url_is_valid(url):
700 info[
"castor_file_url"] = url
702 print(
"Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball")
703 if os.isatty(0): url = sys.stdin.readline()
704 else:
raise IOError(
"stdin is closed.")
709 '''Return the tarball castor location by parsing the cmsPerfSuite.log file''' 710 print(
"Getting the url from the cmsPerfSuite.log")
711 log=open(
"cmsPerfSuite.log",
"r") 712 castor_dir="UNKNOWN_CASTOR_DIR" 713 tarball=
"UNKNOWN_TARBALL" 714 for line
in log.readlines():
715 if 'castordir' in line:
716 castor_dir=line.split()[1]
717 if 'tgz' in line
and tarball==
"UNKNOWN_TARBALL":
719 tarball=os.path.basename(line.split()[2])
720 castor_tarball=os.path.join(castor_dir,tarball)
721 return castor_tarball
724 result = {
"General": {},
"TestResults":{},
"cmsSciMark":{},
'unrecognized_jobs': []}
726 """ all the general info - start, arguments, host etc """ 729 """ machine info - cpu, memmory """ 732 """ we add info about how successfull was the run, when it finished and final castor url to the file! """ 735 print(
"Parsing TimeSize runs...")
739 except Exception
as e:
740 print(
"BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " +
str(e))
742 print(
"Parsing Other(IgProf, Memcheck, ...) runs...")
745 except Exception
as e:
746 print(
"BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " +
str(e))
751 main_cores = [result[
"General"][
"run_on_cpus"]]
752 num_cores = result[
"General"].
get(
"num_cores", 0)
759 result[
"cmsSciMark"] = self.
readCmsScimark(main_cores = main_cores)
768 if __name__ ==
"__main__":
769 from xml.dom
import minidom
770 import cmssw_exportdb_xml
776 path = os.path.abspath(
".")
779 run_info = p.parseAll()
786 xml_doc = minidom.Document()
S & print(S &os, JobReport::InputFile const &f)
static std::string join(char **cmd)
def exportRunInfo(xml_doc, run_info, release=None, print_out=False)
def rulesParser(parsing_rules, lines, compileRules=True)
T get(const Candidate &c)