1 from __future__
import print_function
2 from __future__
import absolute_import
3 from builtins
import range
7 from .
import parsingRulesHelper
9 from commands
import getstatusoutput
13 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ): 16 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result. 17 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo 20 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:") 23 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time. 24 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>. 28 """ Simple function for error detection. TODO: we could use a list of possible steps also """ 29 return not (
not steps
or len(steps) > self.
_MAX_STEPS)
39 """ some initialisation to speedup the other functions """ 44 """ the separator for beginning of timeSize / end of general statistics """ 45 self.
_timeSizeStart = re.compile(
r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
46 """ (the first timestamp is the start of TimeSize) """ 49 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """ 50 self.
_timeSizeEnd = re.compile(
r"""^Stopping all cmsScimark jobs now$""")
56 ----- READ THE DATA ----- 59 """ split the whole file into parts """ 65 timesize_end= [lines.index(line)
for line
in lines
if self._timeSizeEnd.match(line)]
67 timesize_end_index = timesize_end[0]
70 timesize_start=[lines.index(line)
for line
in lines
if self._timeSizeStart.match(line)]
71 general_stop=[lines.index(line)
for line
in lines
if self._otherStart.match(line)]
73 timesize_start_index = timesize_start[0]
74 general_stop_index = timesize_start_index
76 timesize_start_index=timesize_end_index+1
77 general_stop_index=general_stop[0]
79 timesize_start_index=0
82 """ we split the structure: 85 * all others [igprof etc] 88 """ we get the indexes of spliting """ 95 """ a list of missing fields """ 101 Returns whether the string is a timestamp (if not returns None) 103 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009") 105 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009") 108 datetime_format =
"%a %b %d %H:%M:%S %Y" 110 time.strptime(line, datetime_format)
117 return [job_lines.index(line)
118 for line
in job_lines
119 if line.startswith(start_of_line)][0]
122 """ finds a line satisfying the `test_condition` comming before the `line_index` """ 124 for line_index
in range(line_index -1, -1, -1):
125 line = lines[line_index]
127 if test_condition(line):
132 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
133 """ finds a line satisfying the `test_condition` comming after the `line_index` """ 135 for line_index
in range(line_index + 1, len(lines)):
136 line = lines[line_index]
138 if test_condition(line):
144 """ returns the first timestamp BEFORE the line with given index """ 149 """ returns the first timestamp AFTER the line with given index """ 155 raise ValueError(message)
156 print(
" ======== AND ERROR WHILE PARSING METADATA ====")
158 print(
" =============== end ========================= ")
163 """ reads the input cmsPerfsuite.log file """ 164 def readInput(self, path, fileName = "cmsPerfSuite.log"):
166 f = open(os.path.join(path, fileName),
"r") 167 lines = [s.strip() for s
in f.readlines()]
179 """ Returns the cpu and memory info """ 185 * num_cores = max(core id+1) [it's counted from 0] 186 * 'model name' is processor type [we will return only the first one - we assume others to be same!!?? 187 * cpu MHz - is the speed of CPU 192 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz 198 f= open(os.path.join(self.
_path,
"cpuinfo"),
"r") 201 cpu_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
205 "num_cores": max ([
int(attr[1].
strip())+1
for attr
in cpu_attributes
if attr[0].
strip() ==
"processor"]),
206 "cpu_speed_MHZ": max ([attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cpu MHz"]),
207 "cpu_cache_size": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cache size"][0],
208 "cpu_model_name": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"model name"][0]
221 f= open(os.path.join(self.
_path,
"meminfo"),
"r") 224 mem_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
227 "memory_total_ram": [attr[1].
strip()
for attr
in mem_attributes
if attr[0].
strip() ==
"MemTotal"][0]
233 cpu_result.update(mem_result)
240 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules) 241 to each line and if it matches the line, 242 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned 243 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised. 245 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ ) 248 """ we call a shared parsing helper """ 253 self.missing_fields.extend(missing_fields)
260 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the 261 regexp while the second one is the regexp itself """ 264 ((
"",
"num_cores",
"run_on_cpus"),
r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
265 ((
"start_time",
"host",
"local_workdir",
"user"),
r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""",
"req"),
266 ((
"architecture",) ,
r"""^Current Architecture is (.+)$"""),
267 ((
"test_release_based_on",),
r"""^Test Release based on: (.+)$""",
"req"),
268 ((
"base_release_path",) ,
r"""^Base Release in: (.+)$"""),
269 ((
"test_release_local_path",) ,
r"""^Your Test release in: (.+)$"""),
271 ((
"castor_dir",) ,
r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
273 ((
"TimeSize_events",) ,
r"""^(\d+) TimeSize events$"""),
274 ((
"IgProf_events",) ,
r"""^(\d+) IgProf events$"""),
275 ((
"CallGrind_events",) ,
r"""^(\d+) Callgrind events$"""),
276 ((
"Memcheck_events",) ,
r"""^(\d+) Memcheck events$"""),
278 ((
"candles_TimeSize",) ,
r"""^TimeSizeCandles \[(.*)\]$"""),
279 ((
"candles_TimeSizePU",) ,
r"""^TimeSizePUCandles \[(.*)\]$"""),
281 ((
"candles_Memcheck",) ,
r"""^MemcheckCandles \[(.*)\]$"""),
282 ((
"candles_MemcheckPU",) ,
r"""^MemcheckPUCandles \[(.*)\]$"""),
284 ((
"candles_Callgrind",) ,
r"""^CallgrindCandles \[(.*)\]$"""),
285 ((
"candles_CallgrindPU",) ,
r"""^CallgrindPUCandles \[(.*)\]$"""),
287 ((
"candles_IgProfPU",) ,
r"""^IgProfPUCandles \[(.*)\]$"""),
288 ((
"candles_IgProf",) ,
r"""^IgProfCandles \[(.*)\]$"""),
291 ((
"cmsScimark_before",) ,
r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
292 ((
"cmsScimark_after",) ,
r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
293 ((
"cmsDriverOptions",) ,
r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
295 ((
"HEPSPEC06_SCORE",) ,
r"""^This machine's HEPSPEC06 score is: (.+)$"""),
299 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 303 """ postprocess the candles list """ 305 for field, value
in info.items():
306 if field.startswith(
"candles_"):
307 test = field.replace(
"candles_",
"")
308 value = [v.strip(
" '")
for v
in value.split(
",")]
313 info[
"candles"] = self._LINE_SEPARATOR.join([k+
":"+
",".
join(v)
for (k, v)
in candles.items()])
318 --- Tag --- --- RelTag --- -------- Package -------- 319 HEAD V05-03-06 IgTools/IgProf 320 V01-06-05 V01-06-04 Validation/Performance 321 --------------------------------------- 322 total packages: 2 (2 displayed) 324 tags_start_index = -1
326 tags_start_index = [i
for i
in range(0, len(lines))
if lines[i].startswith(
"--- Tag ---")][0]
329 if tags_start_index > -1:
330 tags_end_index = [i
for i
in range(tags_start_index + 1, len(lines))
if lines[i].startswith(
"---------------------------------------")][0]
332 tags = lines[tags_start_index:tags_end_index+2]
337 """ we join the tags with separator to store as simple string """ 338 info[
"tags"] = self._LINE_SEPARATOR.join(tags)
342 """ get the command line """ 345 info[
"command_line"] = lines[cmd_index]
346 except IndexError
as e:
349 info[
"command_line"] =
"" 354 info[
"command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
355 except IndexError
as e:
358 info[
"command_line"] =
"" 368 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
370 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
371 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
373 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
375 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
384 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file: 385 * beginning ->> and start timestamp- the firstone: 386 Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each 387 Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads 388 Mon Jun 14 20:06:54 2010 390 <... whatever might be here, might overlap with other test start/end messages ..> 392 Mon Jun 14 21:59:33 2010 393 IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4 395 * ending - the last timestamp "before is done running ...." 402 reSubmit = re.compile(
r"""^Let's submit (.+) test on core (\d+)$""")
404 reStart = re.compile(
r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
407 reEnd = re.compile(
r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
409 reAddThread = re.compile(
r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
411 reWaiting = re.compile(
r"""^Waiting for tests to be done...$""")
413 reExitCode = re.compile(
r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
414 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """ 419 for line_index
in range(0, len(lines)):
420 line = lines[line_index]
421 if reSubmit.match(line):
422 end_index = self.
findLineAfter(line_index, lines, test_condition=
lambda l: reWaiting.match(l), return_index =
True)
423 jobs.append(lines[line_index:end_index])
425 for job_lines
in jobs:
430 if 'auto:' in info[
'conditions']:
432 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
434 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
435 info[
'conditions']=info[
'conditions'].
split(
",")[1]
440 steps = job_lines[steps_start + 1:steps_end]
444 """ quite nasty - just a work around """ 445 print(
"Trying to recover from this error in case of old cmssw")
447 """ we assume that steps are between the following sentance and a TimeStamp """ 451 steps = job_lines[steps_start + 1:steps_end]
453 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (
str(steps),
str(job_lines)))
455 print(
"RECOVERY SEEMS to be successful: %s" %
str(steps))
457 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
459 start_id_index = self.
findLineAfter(0, job_lines, test_condition = reStart.match, return_index =
True)
460 pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
461 info[
"testname"] = testName
463 thread_id_index = self.
findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index =
True)
466 thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
467 info[
"thread_id"] = thread_id
469 if testName
not in test:
471 test[testName].
append(info)
473 for line_index
in range(0, len(lines)):
474 line = lines[line_index]
476 if reEnd.match(line):
477 testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
482 line_exitcode = self.
findLineBefore(line_index, lines, test_condition=
lambda l: reExitCode.match(l))
483 exit_code, = reExitCode.match(line_exitcode).groups()
484 except Exception
as e:
485 print(
"Error while getting exit code (Other test): %s" +
str(e))
487 for key, thread
in test.items():
488 for i
in range(0, len(thread)):
489 if thread[i][
"thread_id"] == thread_id:
490 thread[i].
update({
"end": time,
"exit_code": exit_code})
497 """ parses the timeSize """ 503 the structure of input file: 504 * beginning ->> and start timestamp- the firstone: 505 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root] 507 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM 508 Candle MinBias will be PROCESSED 509 You defined your own steps to run: 512 Written out cmsRelvalreport.py input file at: 513 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt 514 Thu Aug 13 14:53:37 2009 [start] 516 Thu Aug 13 16:04:48 2009 [end] 517 Individual cmsRelvalreport.py ExitCode 0 518 * ending - the last timestamp "... ExitCode ...." 522 """ divide into separate jobs """ 526 timesize_start_indicator = re.compile(
r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
527 for line_index
in range(0, len(lines)):
528 line = lines[line_index]
530 if timesize_start_indicator.match(line):
532 jobs.append(lines[start:line_index])
535 jobs.append(lines[start:len(lines)])
539 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
541 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
542 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
544 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
546 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
553 reExit_code = re.compile(
r"""Individual ([^\s]+) ExitCode (\d+)""")
556 print(
"TimeSize (%d) jobs: %s" % (len(jobs),
str(jobs)))
558 for job_lines
in jobs:
559 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 562 if 'auto:' in info[
'conditions']:
564 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
566 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
567 info[
'conditions']=info[
'conditions'].
split(
",")[1]
572 """ the following is not available on one of the releases, instead 573 use the first timestamp available on our job - that's the starting time :) """ 583 end_time_before = self.
findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index =
True)
586 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
589 info[
"exit_code"] = exit_code
594 steps = job_lines[steps_start + 1:steps_end]
598 """ quite nasty - just a work around """ 599 print(
"Trying to recover from this error in case of old cmssw")
601 """ we assume that steps are between the following sentance and a TimeStamp """ 605 steps = job_lines[steps_start + 1:steps_end]
607 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (
str(steps),
str(job_lines)))
609 print(
"RECOVERY SEEMS to be successful: %s" %
str(steps))
611 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
614 timesize_result.append(info)
615 return {
"TimeSize": timesize_result}
622 scores = [{
"score": self.reCmsScimarkTest.match(line).groups()[1],
"type": testType,
"core": core}
624 if self.reCmsScimarkTest.match(line)]
629 score.update({
"messurement_number": i})
633 main_core = main_cores[0]
637 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2", testType =
"mainCore", core = main_core))
638 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2_large", testType =
"mainCore_Large", core = main_core))
642 reIsCsiMark_notusedcore = re.compile(
"^cmsScimark_(\d+).log$")
643 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
644 for f
in os.listdir(self.
_path)
645 if reIsCsiMark_notusedcore.match(f)
646 and os.path.isfile(os.path.join(self.
_path, f)) ]
648 for core_number
in scimark_files:
650 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark_%s" %
str(core_number), testType =
"NotUsedCore_%s" %
str(core_number), core = core_number))
659 checks if the suite has successfully finished 660 and if the tarball was successfully archived and uploaded to the castor """ 663 ((
"finishing_time",
"",
""),
r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
664 ((
"castor_md5",) ,
r"""^The md5 checksum of the tarball: (.+)$"""),
665 ((
"successfully_archived_tarball", ),
r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
667 ((
"castor_file_url",),
r"""^The tarball can be found: (.+)$"""),
668 ((
"castor_logfile_url",),
r"""^The logfile can be found: (.+)$"""),
672 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """ 675 """ did we detect any errors in log files ? """ 676 info[
"no_errors_detected"] = [line
for line
in self.
lines_other if line ==
"There were no errors detected in any of the log files!"]
and "1" or "0" 677 if not info[
"successfully_archived_tarball"]:
678 info[
"castor_file_url"] =
"" 680 if not info[
"castor_file_url"]:
682 self.
handleParsingError(
"Castor tarball URL not found. Trying to get from environment")
683 lmdb_castor_url_is_valid =
lambda url: url.startswith(
"/castor/")
689 print(
"Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url)
692 if "PERFDB_CASTOR_FILE_URL" in os.environ:
693 url = os.environ[
"PERFDB_CASTOR_FILE_URL"]
696 print(
"Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL")
701 if lmdb_castor_url_is_valid(url):
702 info[
"castor_file_url"] = url
704 print(
"Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball")
705 if os.isatty(0): url = sys.stdin.readline()
706 else:
raise IOError(
"stdin is closed.")
711 '''Return the tarball castor location by parsing the cmsPerfSuite.log file''' 712 print(
"Getting the url from the cmsPerfSuite.log")
713 log=open(
"cmsPerfSuite.log",
"r") 714 castor_dir="UNKNOWN_CASTOR_DIR" 715 tarball=
"UNKNOWN_TARBALL" 716 for line
in log.readlines():
717 if 'castordir' in line:
718 castor_dir=line.split()[1]
719 if 'tgz' in line
and tarball==
"UNKNOWN_TARBALL":
721 tarball=os.path.basename(line.split()[2])
722 castor_tarball=os.path.join(castor_dir,tarball)
723 return castor_tarball
726 result = {
"General": {},
"TestResults":{},
"cmsSciMark":{},
'unrecognized_jobs': []}
728 """ all the general info - start, arguments, host etc """ 731 """ machine info - cpu, memmory """ 734 """ we add info about how successfull was the run, when it finished and final castor url to the file! """ 737 print(
"Parsing TimeSize runs...")
741 except Exception
as e:
742 print(
"BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " +
str(e))
744 print(
"Parsing Other(IgProf, Memcheck, ...) runs...")
747 except Exception
as e:
748 print(
"BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " +
str(e))
753 main_cores = [result[
"General"][
"run_on_cpus"]]
754 num_cores = result[
"General"].
get(
"num_cores", 0)
761 result[
"cmsSciMark"] = self.
readCmsScimark(main_cores = main_cores)
770 if __name__ ==
"__main__":
771 from xml.dom
import minidom
772 from .
import cmssw_exportdb_xml
778 path = os.path.abspath(
".")
781 run_info = p.parseAll()
788 xml_doc = minidom.Document()
S & print(S &os, JobReport::InputFile const &f)
static std::string join(char **cmd)
def exportRunInfo(xml_doc, run_info, release=None, print_out=False)
def rulesParser(parsing_rules, lines, compileRules=True)
T get(const Candidate &c)