00001 import re
00002 import os, sys
00003 import time
00004 import parsingRulesHelper
00005 import glob
00006 from commands import getstatusoutput
00007
00008 class parserPerfsuiteMetadata:
00009 """
00010 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
00011
00012 * General info
00013 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
00014 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
00015
00016 * TimeSize test
00017 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
00018
00019 * All other tests
00020 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
00021 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
00022 """
00023 _LINE_SEPARATOR = "|"
00024 def validateSteps(self, steps):
00025 """ Simple function for error detection. TODO: we could use a list of possible steps also """
00026 return not (not steps or len(steps) > self._MAX_STEPS)
00027
00028 def __init__(self, path):
00029
00030 self._MAX_STEPS = 5
00031 self._DEBUG = False
00032
00033
00034 self._path = path
00035
00036 """ some initialisation to speedup the other functions """
00037
00038 self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
00039
00040
00041 """ the separator for beginning of timeSize / end of general statistics """
00042 self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
00043 """ (the first timestamp is the start of TimeSize) """
00044
00045
00046 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
00047 self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
00048
00049
00050 self._otherStart = re.compile(r"^Preparing")
00051
00052 """
00053 ----- READ THE DATA -----
00054 """
00055 lines = self.readInput(path)
00056 """ split the whole file into parts """
00057
00058
00059
00060
00061
00062 timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
00063 if timesize_end:
00064 timesize_end_index = timesize_end[0]
00065 else:
00066 timesize_end_index=0
00067 timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
00068 general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
00069 if timesize_start:
00070 timesize_start_index = timesize_start[0]
00071 general_stop_index=timesize_start_index
00072 elif general_stop:
00073 timesize_start_index=0
00074 general_stop_index=general_stop[0]
00075 else:
00076 timesize_start_index=0
00077 general_stop_index=-1
00078
00079 """ we split the structure:
00080 * general
00081 * timesize
00082 * all others [igprof etc]
00083 """
00084
00085 """ we get the indexes of spliting """
00086
00087
00088 self.lines_general = lines[:general_stop_index]
00089 self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
00090 self.lines_other = lines[timesize_end_index:]
00091
00092 """ a list of missing fields """
00093 self.missing_fields = []
00094
00095 @staticmethod
00096 def isTimeStamp(line):
00097 """
00098 Returns whether the string is a timestamp (if not returns None)
00099
00100 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
00101 True
00102 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
00103
00104 """
00105 datetime_format = "%a %b %d %H:%M:%S %Y"
00106 try:
00107 time.strptime(line, datetime_format)
00108 return True
00109 except ValueError:
00110 return None
00111
00112 @staticmethod
00113 def findFirstIndex_ofStartsWith(job_lines, start_of_line):
00114 return [job_lines.index(line)
00115 for line in job_lines
00116 if line.startswith(start_of_line)][0]
00117
00118 def findLineBefore(self, line_index, lines, test_condition):
00119 """ finds a line satisfying the `test_condition` comming before the `line_index` """
00120
00121 for line_index in xrange(line_index -1, -1, -1):
00122 line = lines[line_index]
00123
00124 if test_condition(line):
00125 return line
00126 raise ValueError
00127
00128
00129 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
00130 """ finds a line satisfying the `test_condition` comming after the `line_index` """
00131
00132 for line_index in xrange(line_index + 1, len(lines)):
00133 line = lines[line_index]
00134
00135 if test_condition(line):
00136 if return_index:
00137 return line_index
00138 return line
00139
00140 def firstTimeStampBefore(self, line_index, lines):
00141 """ returns the first timestamp BEFORE the line with given index """
00142
00143 return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
00144
00145 def firstTimeStampAfter(self, line_index, lines):
00146 """ returns the first timestamp AFTER the line with given index """
00147
00148 return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
00149
00150 def handleParsingError(self, message):
00151 if self._DEBUG:
00152 raise ValueError, message
00153 print " ======== AND ERROR WHILE PARSING METADATA ===="
00154 print message
00155 print " =============== end ========================= "
00156
00157
00158
00159
00160 """ reads the input cmsPerfsuite.log file """
00161 def readInput(self, path, fileName = "cmsPerfSuite.log"):
00162 try:
00163 f = open(os.path.join(path, fileName), "r")
00164 lines = [s.strip() for s in f.readlines()]
00165 f.close()
00166 except IOError:
00167 lines = []
00168
00169
00170 return lines
00171
00172
00173
00174
00175 def getMachineInfo(self):
00176 """ Returns the cpu and memory info """
00177
00178 """ cpu info """
00179
00180 """
00181 we assume that:
00182 * num_cores = max(core id+1) [it's counted from 0]
00183 * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
00184 * cpu MHz - is the speed of CPU
00185 """
00186
00187 """
00188 for
00189 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
00190 cpu MHz : 800.000
00191 cache size : 6144 KB
00192 """
00193 cpu_result = {}
00194 try:
00195 f= open(os.path.join(self._path, "cpuinfo"), "r")
00196
00197
00198 cpu_attributes = [l.strip().split(":") for l in f.readlines()]
00199
00200 f.close()
00201 cpu_result = {
00202 "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]),
00203 "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
00204 "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
00205 "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
00206 }
00207 except IOError,e:
00208 print e
00209
00210
00211
00212
00213
00214 """ memory info """
00215 mem_result = {}
00216
00217 try:
00218 f= open(os.path.join(self._path, "meminfo"), "r")
00219
00220
00221 mem_attributes = [l.strip().split(":") for l in f.readlines()]
00222
00223 mem_result = {
00224 "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
00225 }
00226
00227 except IOError,e:
00228 print e
00229
00230 cpu_result.update(mem_result)
00231 return cpu_result
00232
00233
00234
00235 def _applyParsingRules(self, parsing_rules, lines):
00236 """
00237 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
00238 to each line and if it matches the line,
00239 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
00240 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
00241 rules = [
00242 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
00243 ]
00244 """
00245 """ we call a shared parsing helper """
00246
00247
00248 (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
00249
00250 self.missing_fields.extend(missing_fields)
00251
00252 return info
00253
00254
00255 def parseGeneralInfo(self):
00256 lines = self.lines_general
00257 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
00258 regexp while the second one is the regexp itself """
00259
00260 parsing_rules = (
00261 (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
00262 (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
00263 (("architecture",) ,r"""^Current Architecture is (.+)$"""),
00264 (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
00265 (("base_release_path",) , r"""^Base Release in: (.+)$"""),
00266 (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
00267
00268 (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
00269
00270 (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
00271 (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
00272 (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
00273 (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
00274
00275 (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
00276 (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
00277
00278 (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
00279 (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
00280
00281 (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
00282 (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
00283
00284 (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
00285 (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
00286
00287
00288 (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
00289 (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
00290 (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
00291
00292 (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
00293
00294
00295 )
00296 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00297 info = self._applyParsingRules(parsing_rules, lines)
00298
00299
00300 """ postprocess the candles list """
00301 candles = {}
00302 for field, value in info.items():
00303 if field.startswith("candles_"):
00304 test = field.replace("candles_", "")
00305 value = [v.strip(" '") for v in value.split(",")]
00306
00307 candles[test]=value
00308 del info[field]
00309
00310 info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
00311
00312
00313 """ TAGS """
00314 """
00315 --- Tag --- --- RelTag --- -------- Package --------
00316 HEAD V05-03-06 IgTools/IgProf
00317 V01-06-05 V01-06-04 Validation/Performance
00318 ---------------------------------------
00319 total packages: 2 (2 displayed)
00320 """
00321 tags_start_index = -1
00322 try:
00323 tags_start_index = [i for i in xrange(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
00324 except:
00325 pass
00326 if tags_start_index > -1:
00327 tags_end_index = [i for i in xrange(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
00328
00329 tags = lines[tags_start_index:tags_end_index+2]
00330
00331
00332 else:
00333 tags = []
00334 """ we join the tags with separator to store as simple string """
00335 info["tags"] = self._LINE_SEPARATOR.join(tags)
00336
00337
00338
00339 """ get the command line """
00340 try:
00341 cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1
00342 info["command_line"] = lines[cmd_index]
00343 except IndexError, e:
00344 if self._DEBUG:
00345 print e
00346 info["command_line"] = ""
00347
00348 try:
00349 cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
00350 cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
00351 info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
00352 except IndexError, e:
00353 if self._DEBUG:
00354 print e
00355 info["command_line"] = ""
00356
00357 return info
00358
00359
00360 def parseAllOtherTests(self):
00361 threads = {}
00362 tests = {
00363
00364 }
00365
00366 lines = self.lines_other
00367 """
00368
00369 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
00370 * beginning ->> and start timestamp- the firstone:
00371 Adding thread <simpleGenReportThread(Thread-1, started)> to the list of active threads
00372 Launching the Memcheck tests on cpu 3 with 5 events each
00373 Fri Aug 14 01:16:03 2009
00374
00375 <... whatever might be here, might overlap with other test start/end messages ..>
00376
00377 Fri Aug 14 02:13:18 2009
00378 Memcheck test, in thread <simpleGenReportThread(Thread-1, stopped)> is done running on core 3
00379 * ending - the last timestamp "before is done running ...."
00380 """
00381
00382
00383
00384
00385
00386
00387 reStart = re.compile(r"""^Launching the (.*) tests on cpu (\d+) with (\d+) events each$""")
00388
00389 reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped\)> is done running on core (\d+)$""")
00390
00391
00392 reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started\)> to the list of active threads$""")
00393
00394 reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
00395 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
00396 for line_index in xrange(0, len(lines)):
00397 line = lines[line_index]
00398
00399
00400 if reStart.match(line):
00401
00402 testName, testCore, testEventsNum = reStart.match(line).groups()
00403
00404 time = self.firstTimeStampAfter(line_index, lines)
00405
00406
00407 line_thread = self.findLineBefore(line_index, lines, test_condition=lambda l: reAddThread.match(l))
00408 (thread_id, ) = reAddThread.match(line_thread).groups()
00409
00410
00411 if not threads.has_key(thread_id):
00412 threads[thread_id] = {}
00413
00414 threads[thread_id].update({"name": testName, "events_num": testEventsNum, "core": testCore, "start": time, "thread_id": thread_id})
00415
00416
00417 if reEnd.match(line):
00418 testName, thread_id, testCore = reEnd.match(line).groups()
00419 if not threads.has_key(testName):
00420 threads[thread_id] = {}
00421
00422
00423 time = self.firstTimeStampBefore(line_index, lines)
00424 try:
00425 exit_code = ""
00426
00427 line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
00428 exit_code, = reExitCode.match(line_exitcode).groups()
00429 except Exception, e:
00430 print "Error while getting exit code (Other test): %s" + str(e)
00431
00432
00433
00434 threads[thread_id].update({"end": time, "exit_code":exit_code})
00435 for key, thread in threads.items():
00436 tests[thread["name"]] = thread
00437 return tests
00438
00439
00440 def parseTimeSize(self):
00441 """ parses the timeSize """
00442 timesize_result = []
00443
00444
00445
00446 """
00447 the structure of input file:
00448 * beginning ->> and start timestamp- the firstone:
00449 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
00450 <...>
00451 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
00452 Candle MinBias will be PROCESSED
00453 You defined your own steps to run:
00454 RAW2DIGI-RECO
00455 *Candle MinBias
00456 Written out cmsRelvalreport.py input file at:
00457 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
00458 Thu Aug 13 14:53:37 2009 [start]
00459 <....>
00460 Thu Aug 13 16:04:48 2009 [end]
00461 Individual cmsRelvalreport.py ExitCode 0
00462 * ending - the last timestamp "... ExitCode ...."
00463 """
00464
00465
00466 """ divide into separate jobs """
00467 lines = self.lines_timesize
00468 jobs = []
00469 start = False
00470 timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
00471 for line_index in xrange(0, len(lines)):
00472 line = lines[line_index]
00473
00474 if timesize_start_indicator.match(line):
00475 if start:
00476 jobs.append(lines[start:line_index])
00477 start = line_index
00478
00479 jobs.append(lines[start:len(lines)])
00480
00481
00482 parsing_rules = (
00483 (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
00484
00485 (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
00486 (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
00487
00488 (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
00489
00490 (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00491
00492
00493 )
00494
00495
00496
00497 reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
00498
00499 if self._DEBUG:
00500 print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
00501
00502 for job_lines in jobs:
00503 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00504 info = self._applyParsingRules(parsing_rules, job_lines)
00505
00506 if 'auto:' in info['conditions']:
00507 from Configuration.PyReleaseValidation.autoCond import autoCond
00508 info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
00509 else:
00510 if 'FrontierConditions_GlobalTag' in info['conditions']:
00511 info['conditions']=info['conditions'].split(",")[1]
00512
00513
00514
00515
00516 """ the following is not available on one of the releases, instead
00517 use the first timestamp available on our job - that's the starting time :) """
00518
00519
00520
00521 info["start"] = self.firstTimeStampAfter(0, job_lines)
00522
00523
00524
00525
00526
00527 end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
00528
00529
00530 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
00531
00532 info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
00533 info["exit_code"] = exit_code
00534
00535 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
00536 steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
00537
00538 steps = job_lines[steps_start + 1:steps_end]
00539 if not self.validateSteps(steps):
00540 self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
00541
00542 """ quite nasty - just a work around """
00543 print "Trying to recover from this error in case of old cmssw"
00544
00545 """ we assume that steps are between the following sentance and a TimeStamp """
00546 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
00547 steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
00548
00549 steps = job_lines[steps_start + 1:steps_end]
00550 if not self.validateSteps(steps):
00551 self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
00552 else:
00553 print "RECOVERY SEEMS to be successful: %s" % str(steps)
00554
00555 info["steps"] = self._LINE_SEPARATOR.join(steps)
00556
00557
00558 timesize_result.append(info)
00559 return {"TimeSize": timesize_result}
00560
00561
00562
00563
00564 def readCmsScimarkTest(self, testName, testType, core):
00565 lines = self.readInput(self._path, fileName = testName + ".log")
00566 scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
00567 for line in lines
00568 if self.reCmsScimarkTest.match(line)]
00569
00570 i = 0
00571 for score in scores:
00572 i += 1
00573 score.update({"messurement_number": i})
00574 return scores
00575
00576 def readCmsScimark(self, main_cores = [1]):
00577 main_core = main_cores[0]
00578
00579
00580 csimark = []
00581 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
00582 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
00583
00584
00585
00586 reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
00587 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
00588 for f in os.listdir(self._path)
00589 if reIsCsiMark_notusedcore.match(f)
00590 and os.path.isfile(os.path.join(self._path, f)) ]
00591
00592 for core_number in scimark_files:
00593 try:
00594 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
00595 except IOError, e:
00596 if self._DEBUG:
00597 print e
00598 return csimark
00599
00600
00601
00602 def getIgSummary(self):
00603 igresult = []
00604 globbed = glob.glob(os.path.join(self._path, "../*/IgProfData/*/*/*.sql3"))
00605
00606 for f in globbed:
00607
00608 profileInfo = self.getSummaryInfo(f)
00609 if not profileInfo:
00610 continue
00611 cumCounts, cumCalls = profileInfo
00612 dump, architecture, release, rest = f.rsplit("/", 3)
00613 candle, sequence, pileup, conditions, process, counterType, events = rest.split("___")
00614 events = events.replace(".sql3", "")
00615 igresult.append({"counter_type": counterType, "event": events, "cumcounts": cumCounts, "cumcalls": cumCalls})
00616
00617 return igresult
00618
00619 def getSummaryInfo(self, database):
00620 summary_query="""SELECT counter, total_count, total_freq, tick_period
00621 FROM summary;"""
00622 error, output = self.doQuery(summary_query, database)
00623 if error or not output or output.count("\n") > 1:
00624 return None
00625 counter, total_count, total_freq, tick_period = output.split("@@@")
00626 if counter == "PERF_TICKS":
00627 return float(tick_period) * float(total_count), int(total_freq)
00628 else:
00629 return int(total_count), int(total_freq)
00630
00631 def doQuery(self, query, database):
00632 if os.path.exists("/usr/bin/sqlite3"):
00633 sqlite="/usr/bin/sqlite3"
00634 else:
00635 sqlite="/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
00636 return getstatusoutput("echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
00637
00638 def parseTheCompletion(self):
00639 """
00640 checks if the suite has successfully finished
00641 and if the tarball was successfully archived and uploaded to the castor """
00642
00643 parsing_rules = (
00644 (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
00645 (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
00646 (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
00647
00648 (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
00649 (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
00650 )
00651
00652
00653 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00654 info = self._applyParsingRules(parsing_rules, self.lines_other)
00655
00656 """ did we detect any errors in log files ? """
00657 info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
00658 if not info["successfully_archived_tarball"]:
00659 info["castor_file_url"] = ""
00660
00661 if not info["castor_file_url"]:
00662
00663 self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
00664 lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
00665
00666 url = ""
00667 try:
00668 print "HERE!"
00669 url=self.get_tarball_fromlog()
00670 print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
00671
00672 except:
00673 if os.environ.has_key("PERFDB_CASTOR_FILE_URL"):
00674 url = os.environ["PERFDB_CASTOR_FILE_URL"]
00675
00676 else:
00677 print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
00678 self.handleParsingError( "Castor tarball URL not found. Provide interactively")
00679
00680 while True:
00681
00682 if lmdb_castor_url_is_valid(url):
00683 info["castor_file_url"] = url
00684 break
00685 print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
00686 url = sys.stdin.readline()
00687
00688
00689 return info
00690 def get_tarball_fromlog(self):
00691 '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
00692 print "Getting the url from the cmsPerfSuite.log"
00693 log=open("cmsPerfSuite.log","r")
00694 castor_dir="UNKNOWN_CASTOR_DIR"
00695 tarball="UNKNOWN_TARBALL"
00696 for line in log.readlines():
00697 if 'castordir' in line:
00698 castor_dir=line.split()[1]
00699 if 'tgz' in line and tarball=="UNKNOWN_TARBALL":
00700 if 'tar' in line:
00701 tarball=os.path.basename(line.split()[2])
00702 castor_tarball=os.path.join(castor_dir,tarball)
00703 return castor_tarball
00704
00705 def parseAll(self):
00706 result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, "IgSummary":{}, 'unrecognized_jobs': []}
00707
00708 """ all the general info - start, arguments, host etc """
00709 result["General"].update(self.parseGeneralInfo())
00710
00711 """ machine info - cpu, memmory """
00712 result["General"].update(self.getMachineInfo())
00713
00714 """ we add info about how successfull was the run, when it finished and final castor url to the file! """
00715 result["General"].update(self.parseTheCompletion())
00716
00717 try:
00718 result["TestResults"].update(self.parseTimeSize())
00719 except Exception, e:
00720 print "BAD BAD BAD UNHANDLED ERROR" + str(e)
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731 main_cores = [result["General"]["run_on_cpus"]]
00732 num_cores = result["General"].get("num_cores", 0)
00733
00734
00735
00736 main_cores = [1]
00737
00738
00739 result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
00740 result["IgSummary"] = self.getIgSummary()
00741
00742
00743
00744 if self.missing_fields:
00745 self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
00746
00747 return result
00748
00749
00750
00751 if __name__ == "__main__":
00752 from xml.dom import minidom
00753 import cmssw_exportdb_xml
00754
00755
00756
00757
00758
00759 path = os.path.abspath(".")
00760
00761 p = parserPerfsuiteMetadata(path)
00762 run_info = p.parseAll()
00763
00764
00765
00766
00767
00768
00769 xml_doc = minidom.Document()
00770 cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
00771
00772 import doctest
00773 doctest.testmod()
00774
00775
00776
00777