4 import parsingRulesHelper
6 from commands
import getstatusoutput
10 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
13 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
14 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
17 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
20 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
21 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
25 """ Simple function for error detection. TODO: we could use a list of possible steps also """
26 return not (
not steps
or len(steps) > self.
_MAX_STEPS)
36 """ some initialisation to speedup the other functions """
41 """ the separator for beginning of timeSize / end of general statistics """
42 self.
_timeSizeStart = re.compile(
r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
43 """ (the first timestamp is the start of TimeSize) """
46 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
47 self.
_timeSizeEnd = re.compile(
r"""^Stopping all cmsScimark jobs now$""")
53 ----- READ THE DATA -----
56 """ split the whole file into parts """
62 timesize_end= [lines.index(line)
for line
in lines
if self._timeSizeEnd.match(line)]
64 timesize_end_index = timesize_end[0]
67 timesize_start=[lines.index(line)
for line
in lines
if self._timeSizeStart.match(line)]
68 general_stop=[lines.index(line)
for line
in lines
if self._otherStart.match(line)]
70 timesize_start_index = timesize_start[0]
71 general_stop_index=timesize_start_index
73 timesize_start_index=0
74 general_stop_index=general_stop[0]
76 timesize_start_index=0
79 """ we split the structure:
82 * all others [igprof etc]
85 """ we get the indexes of spliting """
92 """ a list of missing fields """
98 Returns whether the string is a timestamp (if not returns None)
100 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
102 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
105 datetime_format =
"%a %b %d %H:%M:%S %Y"
107 time.strptime(line, datetime_format)
114 return [job_lines.index(line)
115 for line
in job_lines
116 if line.startswith(start_of_line)][0]
119 """ finds a line satisfying the `test_condition` comming before the `line_index` """
121 for line_index
in xrange(line_index -1, -1, -1):
122 line = lines[line_index]
124 if test_condition(line):
129 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
130 """ finds a line satisfying the `test_condition` comming after the `line_index` """
132 for line_index
in xrange(line_index + 1, len(lines)):
133 line = lines[line_index]
135 if test_condition(line):
141 """ returns the first timestamp BEFORE the line with given index """
146 """ returns the first timestamp AFTER the line with given index """
152 raise ValueError, message
153 print " ======== AND ERROR WHILE PARSING METADATA ===="
155 print " =============== end ========================= "
160 """ reads the input cmsPerfsuite.log file """
161 def readInput(self, path, fileName = "cmsPerfSuite.log"):
163 f = open(os.path.join(path, fileName),
"r")
164 lines = [s.strip() for s
in f.readlines()]
176 """ Returns the cpu and memory info """
182 * num_cores = max(core id+1) [it's counted from 0]
183 * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
184 * cpu MHz - is the speed of CPU
189 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
195 f= open(os.path.join(self.
_path,
"cpuinfo"),
"r")
198 cpu_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
202 "num_cores": max ([int(attr[1].
strip())+1
for attr
in cpu_attributes
if attr[0].
strip() ==
"processor"]),
203 "cpu_speed_MHZ": max ([attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cpu MHz"]),
204 "cpu_cache_size": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"cache size"][0],
205 "cpu_model_name": [attr[1].
strip()
for attr
in cpu_attributes
if attr[0].
strip() ==
"model name"][0]
218 f= open(os.path.join(self.
_path,
"meminfo"),
"r")
221 mem_attributes = [l.strip().
split(
":")
for l
in f.readlines()]
224 "memory_total_ram": [attr[1].
strip()
for attr
in mem_attributes
if attr[0].
strip() ==
"MemTotal"][0]
230 cpu_result.update(mem_result)
237 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
238 to each line and if it matches the line,
239 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
240 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
242 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
245 """ we call a shared parsing helper """
250 self.missing_fields.extend(missing_fields)
257 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
258 regexp while the second one is the regexp itself """
261 ((
"",
"num_cores",
"run_on_cpus"),
r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
262 ((
"start_time",
"host",
"local_workdir",
"user"),
r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""",
"req"),
263 ((
"architecture",) ,
r"""^Current Architecture is (.+)$"""),
264 ((
"test_release_based_on",),
r"""^Test Release based on: (.+)$""",
"req"),
265 ((
"base_release_path",) ,
r"""^Base Release in: (.+)$"""),
266 ((
"test_release_local_path",) ,
r"""^Your Test release in: (.+)$"""),
268 ((
"castor_dir",) ,
r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
270 ((
"TimeSize_events",) ,
r"""^(\d+) TimeSize events$"""),
271 ((
"IgProf_events",) ,
r"""^(\d+) IgProf events$"""),
272 ((
"CallGrind_events",) ,
r"""^(\d+) Callgrind events$"""),
273 ((
"Memcheck_events",) ,
r"""^(\d+) Memcheck events$"""),
275 ((
"candles_TimeSize",) ,
r"""^TimeSizeCandles \[(.*)\]$"""),
276 ((
"candles_TimeSizePU",) ,
r"""^TimeSizePUCandles \[(.*)\]$"""),
278 ((
"candles_Memcheck",) ,
r"""^MemcheckCandles \[(.*)\]$"""),
279 ((
"candles_MemcheckPU",) ,
r"""^MemcheckPUCandles \[(.*)\]$"""),
281 ((
"candles_Callgrind",) ,
r"""^CallgrindCandles \[(.*)\]$"""),
282 ((
"candles_CallgrindPU",) ,
r"""^CallgrindPUCandles \[(.*)\]$"""),
284 ((
"candles_IgProfPU",) ,
r"""^IgProfPUCandles \[(.*)\]$"""),
285 ((
"candles_IgProf",) ,
r"""^IgProfCandles \[(.*)\]$"""),
288 ((
"cmsScimark_before",) ,
r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
289 ((
"cmsScimark_after",) ,
r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
290 ((
"cmsDriverOptions",) ,
r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
292 ((
"HEPSPEC06_SCORE",) ,
r"""^This machine's HEPSPEC06 score is: (.+)$"""),
296 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
300 """ postprocess the candles list """
302 for field, value
in info.items():
303 if field.startswith(
"candles_"):
304 test = field.replace(
"candles_",
"")
305 value = [v.strip(
" '")
for v
in value.split(
",")]
310 info[
"candles"] = self._LINE_SEPARATOR.join([k+
":"+
",".
join(v)
for (k, v)
in candles.items()])
315 --- Tag --- --- RelTag --- -------- Package --------
316 HEAD V05-03-06 IgTools/IgProf
317 V01-06-05 V01-06-04 Validation/Performance
318 ---------------------------------------
319 total packages: 2 (2 displayed)
321 tags_start_index = -1
323 tags_start_index = [i
for i
in xrange(0, len(lines))
if lines[i].startswith(
"--- Tag ---")][0]
326 if tags_start_index > -1:
327 tags_end_index = [i
for i
in xrange(tags_start_index + 1, len(lines))
if lines[i].startswith(
"---------------------------------------")][0]
329 tags = lines[tags_start_index:tags_end_index+2]
334 """ we join the tags with separator to store as simple string """
335 info[
"tags"] = self._LINE_SEPARATOR.join(tags)
339 """ get the command line """
342 info[
"command_line"] = lines[cmd_index]
343 except IndexError, e:
346 info[
"command_line"] =
""
351 info[
"command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
352 except IndexError, e:
355 info[
"command_line"] =
""
369 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
370 * beginning ->> and start timestamp- the firstone:
371 Adding thread <simpleGenReportThread(Thread-1, started)> to the list of active threads
372 Launching the Memcheck tests on cpu 3 with 5 events each
373 Fri Aug 14 01:16:03 2009
375 <... whatever might be here, might overlap with other test start/end messages ..>
377 Fri Aug 14 02:13:18 2009
378 Memcheck test, in thread <simpleGenReportThread(Thread-1, stopped)> is done running on core 3
379 * ending - the last timestamp "before is done running ...."
387 reStart = re.compile(
r"""^Launching the (.*) tests on cpu (\d+) with (\d+) events each$""")
389 reEnd = re.compile(
r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped\)> is done running on core (\d+)$""")
392 reAddThread = re.compile(
r"""^Adding thread <simpleGenReportThread\((.+), started\)> to the list of active threads$""")
394 reExitCode = re.compile(
r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
395 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
396 for line_index
in xrange(0, len(lines)):
397 line = lines[line_index]
400 if reStart.match(line):
402 testName, testCore, testEventsNum = reStart.match(line).groups()
407 line_thread = self.
findLineBefore(line_index, lines, test_condition=
lambda l: reAddThread.match(l))
408 (thread_id, ) = reAddThread.match(line_thread).groups()
411 if not threads.has_key(thread_id):
412 threads[thread_id] = {}
414 threads[thread_id].
update({
"name": testName,
"events_num": testEventsNum,
"core": testCore,
"start": time,
"thread_id": thread_id})
417 if reEnd.match(line):
418 testName, thread_id, testCore = reEnd.match(line).groups()
419 if not threads.has_key(testName):
420 threads[thread_id] = {}
427 line_exitcode = self.
findLineBefore(line_index, lines, test_condition=
lambda l: reExitCode.match(l))
428 exit_code, = reExitCode.match(line_exitcode).groups()
430 print "Error while getting exit code (Other test): %s" + str(e)
434 threads[thread_id].
update({
"end": time,
"exit_code":exit_code})
435 for key, thread
in threads.items():
436 tests[thread[
"name"]] = thread
441 """ parses the timeSize """
447 the structure of input file:
448 * beginning ->> and start timestamp- the firstone:
449 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
451 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
452 Candle MinBias will be PROCESSED
453 You defined your own steps to run:
456 Written out cmsRelvalreport.py input file at:
457 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
458 Thu Aug 13 14:53:37 2009 [start]
460 Thu Aug 13 16:04:48 2009 [end]
461 Individual cmsRelvalreport.py ExitCode 0
462 * ending - the last timestamp "... ExitCode ...."
466 """ divide into separate jobs """
470 timesize_start_indicator = re.compile(
r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
471 for line_index
in xrange(0, len(lines)):
472 line = lines[line_index]
474 if timesize_start_indicator.match(line):
476 jobs.append(lines[start:line_index])
479 jobs.append(lines[start:len(lines)])
483 ((
"",
"candle", ),
r"""^(Candle|ONLY) (.+) will be PROCESSED$""",
"req"),
485 ((
"cms_driver_options", ),
r"""^Using user-specified cmsDriver.py options: (.+)$"""),
486 ((
"",
"conditions",
""),
r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""",
"req"),
488 ((
"",
"pileup_type",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
490 ((
"",
"event_content",
""),
r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""",
"req"),
497 reExit_code = re.compile(
r"""Individual ([^\s]+) ExitCode (\d+)""")
500 print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
502 for job_lines
in jobs:
503 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
506 if 'auto:' in info[
'conditions']:
507 from Configuration.PyReleaseValidation.autoCond
import autoCond
508 info[
'conditions'] = autoCond[ info[
'conditions'].
split(
':')[1] ].
split(
"::")[0]
510 if 'FrontierConditions_GlobalTag' in info[
'conditions']:
511 info[
'conditions']=info[
'conditions'].
split(
",")[1]
516 """ the following is not available on one of the releases, instead
517 use the first timestamp available on our job - that's the starting time :) """
527 end_time_before = self.
findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index =
True)
530 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
533 info[
"exit_code"] = exit_code
538 steps = job_lines[steps_start + 1:steps_end]
540 self.
handleParsingError(
"Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
542 """ quite nasty - just a work around """
543 print "Trying to recover from this error in case of old cmssw"
545 """ we assume that steps are between the following sentance and a TimeStamp """
549 steps = job_lines[steps_start + 1:steps_end]
551 self.
handleParsingError(
"EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
553 print "RECOVERY SEEMS to be successful: %s" % str(steps)
555 info[
"steps"] = self._LINE_SEPARATOR.join(steps)
558 timesize_result.append(info)
559 return {
"TimeSize": timesize_result}
566 scores = [{
"score": self.reCmsScimarkTest.match(line).groups()[1],
"type": testType,
"core": core}
568 if self.reCmsScimarkTest.match(line)]
573 score.update({
"messurement_number": i})
577 main_core = main_cores[0]
581 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2", testType =
"mainCore", core = main_core))
582 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark2_large", testType =
"mainCore_Large", core = main_core))
586 reIsCsiMark_notusedcore = re.compile(
"^cmsScimark_(\d+).log$")
587 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
588 for f
in os.listdir(self.
_path)
589 if reIsCsiMark_notusedcore.match(f)
590 and os.path.isfile(os.path.join(self.
_path, f)) ]
592 for core_number
in scimark_files:
594 csimark.extend(self.
readCmsScimarkTest(testName =
"cmsScimark_%s" % str(core_number), testType =
"NotUsedCore_%s" %str(core_number), core = core_number))
604 globbed = glob.glob(os.path.join(self.
_path,
"../*/IgProfData/*/*/*.sql3"))
611 cumCounts, cumCalls = profileInfo
612 dump, architecture, release, rest = f.rsplit(
"/", 3)
613 candle, sequence, pileup, conditions, process, counterType, events = rest.split(
"___")
614 events = events.replace(
".sql3",
"")
615 igresult.append({
"counter_type": counterType,
"event": events,
"cumcounts": cumCounts,
"cumcalls": cumCalls})
620 summary_query=
"""SELECT counter, total_count, total_freq, tick_period
622 error, output = self.
doQuery(summary_query, database)
623 if error
or not output
or output.count(
"\n") > 1:
625 counter, total_count, total_freq, tick_period = output.split(
"@@@")
626 if counter ==
"PERF_TICKS":
627 return float(tick_period) * float(total_count), int(total_freq)
629 return int(total_count), int(total_freq)
632 if os.path.exists(
"/usr/bin/sqlite3"):
633 sqlite=
"/usr/bin/sqlite3"
635 sqlite=
"/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
636 return getstatusoutput(
"echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
640 checks if the suite has successfully finished
641 and if the tarball was successfully archived and uploaded to the castor """
644 ((
"finishing_time",
"",
""),
r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
645 ((
"castor_md5",) ,
r"""^The md5 checksum of the tarball: (.+)$"""),
646 ((
"successfully_archived_tarball", ),
r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
648 ((
"castor_file_url",),
r"""^The tarball can be found: (.+)$"""),
649 ((
"castor_logfile_url",),
r"""^The logfile can be found: (.+)$"""),
653 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
656 """ did we detect any errors in log files ? """
657 info[
"no_errors_detected"] = [line
for line
in self.
lines_other if line ==
"There were no errors detected in any of the log files!"]
and "1" or "0"
658 if not info[
"successfully_archived_tarball"]:
659 info[
"castor_file_url"] =
""
661 if not info[
"castor_file_url"]:
663 self.
handleParsingError(
"Castor tarball URL not found. Trying to get from environment")
664 lmdb_castor_url_is_valid =
lambda url: url.startswith(
"/castor/")
670 print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
673 if os.environ.has_key(
"PERFDB_CASTOR_FILE_URL"):
674 url = os.environ[
"PERFDB_CASTOR_FILE_URL"]
677 print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
682 if lmdb_castor_url_is_valid(url):
683 info[
"castor_file_url"] = url
685 print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
686 url = sys.stdin.readline()
691 '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
692 print "Getting the url from the cmsPerfSuite.log"
693 log=open(
"cmsPerfSuite.log",
"r")
694 castor_dir="UNKNOWN_CASTOR_DIR"
695 tarball=
"UNKNOWN_TARBALL"
696 for line
in log.readlines():
697 if 'castordir' in line:
698 castor_dir=line.split()[1]
699 if 'tgz' in line
and tarball==
"UNKNOWN_TARBALL":
701 tarball=os.path.basename(line.split()[2])
702 castor_tarball=os.path.join(castor_dir,tarball)
703 return castor_tarball
706 result = {
"General": {},
"TestResults":{},
"cmsSciMark":{},
"IgSummary":{},
'unrecognized_jobs': []}
708 """ all the general info - start, arguments, host etc """
711 """ machine info - cpu, memmory """
714 """ we add info about how successfull was the run, when it finished and final castor url to the file! """
720 print "BAD BAD BAD UNHANDLED ERROR" + str(e)
731 main_cores = [result[
"General"][
"run_on_cpus"]]
732 num_cores = result[
"General"].
get(
"num_cores", 0)
739 result[
"cmsSciMark"] = self.
readCmsScimark(main_cores = main_cores)
751 if __name__ ==
"__main__":
752 from xml.dom
import minidom
753 import cmssw_exportdb_xml
759 path = os.path.abspath(
".")
762 run_info = p.parseAll()
769 xml_doc = minidom.Document()
static std::string join(char **cmd)
T get(const Candidate &c)