00001 import re
00002 import os, sys
00003 import time
00004 import parsingRulesHelper
00005 import glob
00006 from commands import getstatusoutput
00007
00008 class parserPerfsuiteMetadata:
00009 """
00010 The whole parsing works as following. We split the file into 3 parts (we keep 3 variables of line lists:self.lines_general, self.lines_timesize, self.lines_other ):
00011
00012 * General info
00013 As most of the info are simple one line strings, we define some regular expressions defining and matching each of those lines. The regular expressions are associated with data which we can get from them. e.g. ^Suite started at (.+) on (.+) by user (.+)$ would match only the line defining the time suite started and on which machine. It's associated with tuple of field names for general info which will be filled in. in this way we get info = {'start_time': start-taken-from-regexp, 'host': host, 'user': user}. This is done by calling simple function _applyParsingRules which checks each lines with each if one passes another, if it does fills in the result dictionary with the result.
00014 Additionaly we get the cpu and memmory info from /proc/cpuinfo /proc/meminfo
00015
00016 * TimeSize test
00017 We use the same technique a little bit also. But at first we divide the timesize lines by job (individual run of cmssw - per candle, and pileup/not). Then for each of the jobs we apply our parsing rules, also we find the starting and ending times (i.e. We know that start timestamp is somethere after certain line containing "Written out cmsRelvalreport.py input file at:")
00018
00019 * All other tests
00020 We find the stating that the test is being launched (containing the test name, core and num events). Above we have the thread number, and below the starting time.
00021 The ending time can be ONLY connected with the starting time by the Thread-ID. The problem is that the file names different the same test instance like <Launching "PILE UP Memcheck"> and <"Memcheck" stopped>.
00022 """
00023 _LINE_SEPARATOR = "|"
00024 def validateSteps(self, steps):
00025 """ Simple function for error detection. TODO: we could use a list of possible steps also """
00026 return not (not steps or len(steps) > self._MAX_STEPS)
00027
00028 def __init__(self, path):
00029
00030 self._MAX_STEPS = 5
00031 self._DEBUG = False
00032
00033
00034 self._path = path
00035
00036 """ some initialisation to speedup the other functions """
00037
00038 self.reCmsScimarkTest = re.compile(r"""^Composite Score:(\s*)([^\s]+)$""")
00039
00040
00041 """ the separator for beginning of timeSize / end of general statistics """
00042 self._timeSizeStart = re.compile(r"""^Launching the TimeSize tests \(TimingReport, TimeReport, SimpleMemoryCheck, EdmSize\) with (\d+) events each$""")
00043 """ (the first timestamp is the start of TimeSize) """
00044
00045
00046 """ the separator for end of timeSize / beginning of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests """
00047 self._timeSizeEnd = re.compile(r"""^Stopping all cmsScimark jobs now$""")
00048
00049
00050 self._otherStart = re.compile(r"^Preparing")
00051
00052 """
00053 ----- READ THE DATA -----
00054 """
00055 lines = self.readInput(path)
00056 """ split the whole file into parts """
00057
00058
00059
00060
00061
00062 timesize_end= [lines.index(line) for line in lines if self._timeSizeEnd.match(line)]
00063 if timesize_end:
00064 timesize_end_index = timesize_end[0]
00065 else:
00066 timesize_end_index=0
00067 timesize_start=[lines.index(line) for line in lines if self._timeSizeStart.match(line)]
00068 general_stop=[lines.index(line) for line in lines if self._otherStart.match(line)]
00069 if timesize_start:
00070 timesize_start_index = timesize_start[0]
00071 general_stop_index = timesize_start_index
00072 elif general_stop:
00073 timesize_start_index=timesize_end_index+1
00074 general_stop_index=general_stop[0]
00075 else:
00076 timesize_start_index=0
00077 general_stop_index=-1
00078
00079 """ we split the structure:
00080 * general
00081 * timesize
00082 * all others [igprof etc]
00083 """
00084
00085 """ we get the indexes of spliting """
00086
00087
00088 self.lines_general = lines[:general_stop_index]
00089 self.lines_timesize = lines[timesize_start_index:timesize_end_index+1]
00090 self.lines_other = lines[timesize_end_index:]
00091
00092 """ a list of missing fields """
00093 self.missing_fields = []
00094
00095 @staticmethod
00096 def isTimeStamp(line):
00097 """
00098 Returns whether the string is a timestamp (if not returns None)
00099
00100 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Aug 14 01:16:03 2009")
00101 True
00102 >>> parserPerfsuiteMetadata.isTimeStamp("Fri Augx 14 01:16:03 2009")
00103
00104 """
00105 datetime_format = "%a %b %d %H:%M:%S %Y"
00106 try:
00107 time.strptime(line, datetime_format)
00108 return True
00109 except ValueError:
00110 return None
00111
00112 @staticmethod
00113 def findFirstIndex_ofStartsWith(job_lines, start_of_line):
00114 return [job_lines.index(line)
00115 for line in job_lines
00116 if line.startswith(start_of_line)][0]
00117
00118 def findLineBefore(self, line_index, lines, test_condition):
00119 """ finds a line satisfying the `test_condition` comming before the `line_index` """
00120
00121 for line_index in xrange(line_index -1, -1, -1):
00122 line = lines[line_index]
00123
00124 if test_condition(line):
00125 return line
00126 raise ValueError
00127
00128
00129 def findLineAfter(self, line_index, lines, test_condition, return_index = False):
00130 """ finds a line satisfying the `test_condition` comming after the `line_index` """
00131
00132 for line_index in xrange(line_index + 1, len(lines)):
00133 line = lines[line_index]
00134
00135 if test_condition(line):
00136 if return_index:
00137 return line_index
00138 return line
00139
00140 def firstTimeStampBefore(self, line_index, lines):
00141 """ returns the first timestamp BEFORE the line with given index """
00142
00143 return self.findLineBefore(line_index, lines, test_condition = self.isTimeStamp)
00144
00145 def firstTimeStampAfter(self, line_index, lines):
00146 """ returns the first timestamp AFTER the line with given index """
00147
00148 return self.findLineAfter(line_index, lines, test_condition = self.isTimeStamp)
00149
00150 def handleParsingError(self, message):
00151 if self._DEBUG:
00152 raise ValueError, message
00153 print " ======== AND ERROR WHILE PARSING METADATA ===="
00154 print message
00155 print " =============== end ========================= "
00156
00157
00158
00159
00160 """ reads the input cmsPerfsuite.log file """
00161 def readInput(self, path, fileName = "cmsPerfSuite.log"):
00162 try:
00163 f = open(os.path.join(path, fileName), "r")
00164 lines = [s.strip() for s in f.readlines()]
00165 f.close()
00166 except IOError:
00167 lines = []
00168
00169
00170 return lines
00171
00172
00173
00174
00175 def getMachineInfo(self):
00176 """ Returns the cpu and memory info """
00177
00178 """ cpu info """
00179
00180 """
00181 we assume that:
00182 * num_cores = max(core id+1) [it's counted from 0]
00183 * 'model name' is processor type [we will return only the first one - we assume others to be same!!??
00184 * cpu MHz - is the speed of CPU
00185 """
00186
00187 """
00188 for
00189 model name : Intel(R) Core(TM)2 Duo CPU L9400 @ 1.86GHz
00190 cpu MHz : 800.000
00191 cache size : 6144 KB
00192 """
00193 cpu_result = {}
00194 try:
00195 f= open(os.path.join(self._path, "cpuinfo"), "r")
00196
00197
00198 cpu_attributes = [l.strip().split(":") for l in f.readlines()]
00199
00200 f.close()
00201 cpu_result = {
00202 "num_cores": max ([int(attr[1].strip())+1 for attr in cpu_attributes if attr[0].strip() == "processor"]),
00203 "cpu_speed_MHZ": max ([attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cpu MHz"]),
00204 "cpu_cache_size": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "cache size"][0],
00205 "cpu_model_name": [attr[1].strip() for attr in cpu_attributes if attr[0].strip() == "model name"][0]
00206 }
00207 except IOError,e:
00208 print e
00209
00210
00211
00212
00213
00214 """ memory info """
00215 mem_result = {}
00216
00217 try:
00218 f= open(os.path.join(self._path, "meminfo"), "r")
00219
00220
00221 mem_attributes = [l.strip().split(":") for l in f.readlines()]
00222
00223 mem_result = {
00224 "memory_total_ram": [attr[1].strip() for attr in mem_attributes if attr[0].strip() == "MemTotal"][0]
00225 }
00226
00227 except IOError,e:
00228 print e
00229
00230 cpu_result.update(mem_result)
00231 return cpu_result
00232
00233
00234
00235 def _applyParsingRules(self, parsing_rules, lines):
00236 """
00237 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
00238 to each line and if it matches the line,
00239 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
00240 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
00241 rules = [
00242 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
00243 ]
00244 """
00245 """ we call a shared parsing helper """
00246
00247
00248 (info, missing_fields) = parsingRulesHelper.rulesParser(parsing_rules, lines, compileRules = True)
00249
00250 self.missing_fields.extend(missing_fields)
00251
00252 return info
00253
00254
00255 def parseGeneralInfo(self):
00256 lines = self.lines_general
00257 """ we define a simple list (tuple) of rules for parsing, the first part tuple defines the parameters to be fetched from the
00258 regexp while the second one is the regexp itself """
00259
00260 parsing_rules = (
00261 (("", "num_cores", "run_on_cpus"), r"""^This machine \((.+)\) is assumed to have (\d+) cores, and the suite will be run on cpu \[(.+)\]$"""),
00262 (("start_time", "host", "local_workdir", "user"), r"""^Performance Suite started running at (.+) on (.+) in directory (.+), run by user (.+)$""", "req"),
00263 (("architecture",) ,r"""^Current Architecture is (.+)$"""),
00264 (("test_release_based_on",), r"""^Test Release based on: (.+)$""", "req"),
00265 (("base_release_path",) , r"""^Base Release in: (.+)$"""),
00266 (("test_release_local_path",) , r"""^Your Test release in: (.+)$"""),
00267
00268 (("castor_dir",) , r"""^The performance suite results tarball will be stored in CASTOR at (.+)$"""),
00269
00270 (("TimeSize_events",) , r"""^(\d+) TimeSize events$"""),
00271 (("IgProf_events",) , r"""^(\d+) IgProf events$"""),
00272 (("CallGrind_events",) , r"""^(\d+) Callgrind events$"""),
00273 (("Memcheck_events",) , r"""^(\d+) Memcheck events$"""),
00274
00275 (("candles_TimeSize",) , r"""^TimeSizeCandles \[(.*)\]$"""),
00276 (("candles_TimeSizePU",) , r"""^TimeSizePUCandles \[(.*)\]$"""),
00277
00278 (("candles_Memcheck",) , r"""^MemcheckCandles \[(.*)\]$"""),
00279 (("candles_MemcheckPU",) , r"""^MemcheckPUCandles \[(.*)\]$"""),
00280
00281 (("candles_Callgrind",) , r"""^CallgrindCandles \[(.*)\]$"""),
00282 (("candles_CallgrindPU",) , r"""^CallgrindPUCandles \[(.*)\]$"""),
00283
00284 (("candles_IgProfPU",) , r"""^IgProfPUCandles \[(.*)\]$"""),
00285 (("candles_IgProf",) , r"""^IgProfCandles \[(.*)\]$"""),
00286
00287
00288 (("cmsScimark_before",) , r"""^(\d+) cmsScimark benchmarks before starting the tests$"""),
00289 (("cmsScimark_after",) , r"""^(\d+) cmsScimarkLarge benchmarks before starting the tests$"""),
00290 (("cmsDriverOptions",) , r"""^Running cmsDriver.py with user defined options: --cmsdriver="(.+)"$"""),
00291
00292 (("HEPSPEC06_SCORE",) ,r"""^This machine's HEPSPEC06 score is: (.+)$"""),
00293
00294
00295 )
00296 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00297 info = self._applyParsingRules(parsing_rules, lines)
00298
00299
00300 """ postprocess the candles list """
00301 candles = {}
00302 for field, value in info.items():
00303 if field.startswith("candles_"):
00304 test = field.replace("candles_", "")
00305 value = [v.strip(" '") for v in value.split(",")]
00306
00307 candles[test]=value
00308 del info[field]
00309
00310 info["candles"] = self._LINE_SEPARATOR.join([k+":"+",".join(v) for (k, v) in candles.items()])
00311
00312
00313 """ TAGS """
00314 """
00315 --- Tag --- --- RelTag --- -------- Package --------
00316 HEAD V05-03-06 IgTools/IgProf
00317 V01-06-05 V01-06-04 Validation/Performance
00318 ---------------------------------------
00319 total packages: 2 (2 displayed)
00320 """
00321 tags_start_index = -1
00322 try:
00323 tags_start_index = [i for i in xrange(0, len(lines)) if lines[i].startswith("--- Tag ---")][0]
00324 except:
00325 pass
00326 if tags_start_index > -1:
00327 tags_end_index = [i for i in xrange(tags_start_index + 1, len(lines)) if lines[i].startswith("---------------------------------------")][0]
00328
00329 tags = lines[tags_start_index:tags_end_index+2]
00330
00331
00332 else:
00333 tags = []
00334 """ we join the tags with separator to store as simple string """
00335 info["tags"] = self._LINE_SEPARATOR.join(tags)
00336
00337
00338
00339 """ get the command line """
00340 try:
00341 cmd_index = self.findFirstIndex_ofStartsWith(lines, "Performance suite invoked with command line:") + 1
00342 info["command_line"] = lines[cmd_index]
00343 except IndexError, e:
00344 if self._DEBUG:
00345 print e
00346 info["command_line"] = ""
00347
00348 try:
00349 cmd_parsed_start = self.findFirstIndex_ofStartsWith(lines, "Initial PerfSuite Arguments:") + 1
00350 cmd_parsed_end = self.findFirstIndex_ofStartsWith(lines, "Running cmsDriver.py")
00351 info["command_line_parsed"] = self._LINE_SEPARATOR.join(lines[cmd_parsed_start:cmd_parsed_end])
00352 except IndexError, e:
00353 if self._DEBUG:
00354 print e
00355 info["command_line"] = ""
00356
00357 return info
00358
00359
00360 def parseAllOtherTests(self):
00361
00362 test = {}
00363
00364 parsing_rules = (
00365 (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
00366
00367 (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
00368 (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
00369
00370 (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
00371
00372 (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00373
00374
00375 )
00376
00377
00378 lines = self.lines_other
00379 """
00380
00381 for each of IgProf_Perf, IgProf_Mem, Memcheck, Callgrind tests we have such a structure of input file:
00382 * beginning ->> and start timestamp- the firstone:
00383 Launching the PILE UP IgProf_Mem tests on cpu 4 with 201 events each
00384 Adding thread <simpleGenReportThread(Thread-1, started -176235632)> to the list of active threads
00385 Mon Jun 14 20:06:54 2010
00386
00387 <... whatever might be here, might overlap with other test start/end messages ..>
00388
00389 Mon Jun 14 21:59:33 2010
00390 IgProf_Mem test, in thread <simpleGenReportThread(Thread-1, stopped -176235632)> is done running on core 4
00391
00392 * ending - the last timestamp "before is done running ...."
00393 """
00394
00395
00396
00397
00398
00399 reSubmit = re.compile(r"""^Let's submit (.+) test on core (\d+)$""")
00400
00401 reStart = re.compile(r"""^Launching the (PILE UP |)(.*) tests on cpu (\d+) with (\d+) events each$""")
00402
00403
00404 reEnd = re.compile(r"""^(.*) test, in thread <simpleGenReportThread\((.+), stopped -(\d+)\)> is done running on core (\d+)$""")
00405
00406 reAddThread = re.compile(r"""^Adding thread <simpleGenReportThread\((.+), started -(\d+)\)> to the list of active threads$""")
00407
00408 reWaiting = re.compile(r"""^Waiting for tests to be done...$""")
00409
00410 reExitCode = re.compile(r"""Individual cmsRelvalreport.py ExitCode (\d+)""")
00411 """ we search for lines being either: (it's a little pascal'ish but we need the index!) """
00412
00413 jobs = []
00414
00415
00416 for line_index in xrange(0, len(lines)):
00417 line = lines[line_index]
00418 if reSubmit.match(line):
00419 end_index = self.findLineAfter(line_index, lines, test_condition=lambda l: reWaiting.match(l), return_index = True)
00420 jobs.append(lines[line_index:end_index])
00421
00422 for job_lines in jobs:
00423
00424 info = self._applyParsingRules(parsing_rules, job_lines)
00425
00426
00427 if 'auto:' in info['conditions']:
00428 from Configuration.AlCa.autoCond import autoCond
00429 info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
00430 else:
00431 if 'FrontierConditions_GlobalTag' in info['conditions']:
00432 info['conditions']=info['conditions'].split(",")[1]
00433
00434 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
00435 steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
00436
00437 steps = job_lines[steps_start + 1:steps_end]
00438 if not self.validateSteps(steps):
00439 self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
00440
00441 """ quite nasty - just a work around """
00442 print "Trying to recover from this error in case of old cmssw"
00443
00444 """ we assume that steps are between the following sentance and a TimeStamp """
00445 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
00446 steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
00447
00448 steps = job_lines[steps_start + 1:steps_end]
00449 if not self.validateSteps(steps):
00450 self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
00451 else:
00452 print "RECOVERY SEEMS to be successful: %s" % str(steps)
00453
00454 info["steps"] = self._LINE_SEPARATOR.join(steps)
00455
00456 start_id_index = self.findLineAfter(0, job_lines, test_condition = reStart.match, return_index = True)
00457 pileUp, testName, testCore, testEventsNum = reStart.match(job_lines[start_id_index]).groups()
00458 info["testname"] = testName
00459
00460 thread_id_index = self.findLineAfter(0, job_lines, test_condition = reAddThread.match, return_index = True)
00461 info["start"] = self.firstTimeStampAfter(thread_id_index, job_lines)
00462
00463 thread_id, thread_number = reAddThread.match(job_lines[thread_id_index]).groups()
00464 info["thread_id"] = thread_id
00465
00466 if not test.has_key(testName):
00467 test[testName] = []
00468 test[testName].append(info)
00469
00470 for line_index in xrange(0, len(lines)):
00471 line = lines[line_index]
00472
00473 if reEnd.match(line):
00474 testName, thread_id, thread_num, testCore = reEnd.match(line).groups()
00475 time = self.firstTimeStampBefore(line_index, lines)
00476 try:
00477 exit_code = ""
00478
00479 line_exitcode = self.findLineBefore(line_index, lines, test_condition=lambda l: reExitCode.match(l))
00480 exit_code, = reExitCode.match(line_exitcode).groups()
00481 except Exception, e:
00482 print "Error while getting exit code (Other test): %s" + str(e)
00483
00484 for key, thread in test.items():
00485 for i in range(0, len(thread)):
00486 if thread[i]["thread_id"] == thread_id:
00487 thread[i].update({"end": time, "exit_code": exit_code})
00488 break
00489
00490 return test
00491
00492
00493 def parseTimeSize(self):
00494 """ parses the timeSize """
00495 timesize_result = []
00496
00497
00498
00499 """
00500 the structure of input file:
00501 * beginning ->> and start timestamp- the firstone:
00502 >>> [optional:For these tests will use user input file /build/RAWReference/MinBias_RAW_320_IDEAL.root]
00503 <...>
00504 Using user-specified cmsDriver.py options: --conditions FrontierConditions_GlobalTag,MC_31X_V4::All --eventcontent RECOSIM
00505 Candle MinBias will be PROCESSED
00506 You defined your own steps to run:
00507 RAW2DIGI-RECO
00508 *Candle MinBias
00509 Written out cmsRelvalreport.py input file at:
00510 /build/relval/CMSSW_3_2_4/workStep2/MinBias_TimeSize/SimulationCandles_CMSSW_3_2_4.txt
00511 Thu Aug 13 14:53:37 2009 [start]
00512 <....>
00513 Thu Aug 13 16:04:48 2009 [end]
00514 Individual cmsRelvalreport.py ExitCode 0
00515 * ending - the last timestamp "... ExitCode ...."
00516 """
00517
00518
00519 """ divide into separate jobs """
00520 lines = self.lines_timesize
00521 jobs = []
00522 start = False
00523 timesize_start_indicator = re.compile(r"""^taskset -c (\d+) cmsRelvalreportInput.py""")
00524 for line_index in xrange(0, len(lines)):
00525 line = lines[line_index]
00526
00527 if timesize_start_indicator.match(line):
00528 if start:
00529 jobs.append(lines[start:line_index])
00530 start = line_index
00531
00532 jobs.append(lines[start:len(lines)])
00533
00534
00535 parsing_rules = (
00536 (("", "candle", ), r"""^(Candle|ONLY) (.+) will be PROCESSED$""", "req"),
00537
00538 (("cms_driver_options", ), r"""^Using user-specified cmsDriver.py options: (.+)$"""),
00539 (("", "conditions", ""), r"""^Using user-specified cmsDriver.py options: (.*)--conditions ([^\s]+)(.*)$""", "req"),
00540
00541 (("", "pileup_type", ""), r"""^Using user-specified cmsDriver.py options:(.*)--pileup=([^\s]+)(.*)$"""),
00542
00543 (("", "event_content", ""), r"""^Using user-specified cmsDriver.py options:(.*)--eventcontent ([^\s]+)(.*)$""", "req"),
00544
00545
00546 )
00547
00548
00549
00550 reExit_code = re.compile(r"""Individual ([^\s]+) ExitCode (\d+)""")
00551
00552 if self._DEBUG:
00553 print "TimeSize (%d) jobs: %s" % (len(jobs), str(jobs))
00554
00555 for job_lines in jobs:
00556 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00557 info = self._applyParsingRules(parsing_rules, job_lines)
00558
00559 if 'auto:' in info['conditions']:
00560 from Configuration.AlCa.autoCond import autoCond
00561 info['conditions'] = autoCond[ info['conditions'].split(':')[1] ].split("::")[0]
00562 else:
00563 if 'FrontierConditions_GlobalTag' in info['conditions']:
00564 info['conditions']=info['conditions'].split(",")[1]
00565
00566
00567
00568
00569 """ the following is not available on one of the releases, instead
00570 use the first timestamp available on our job - that's the starting time :) """
00571
00572
00573
00574 info["start"] = self.firstTimeStampAfter(0, job_lines)
00575
00576
00577
00578
00579
00580 end_time_before = self.findLineAfter(0, job_lines, test_condition = reExit_code.match, return_index = True)
00581
00582
00583 nothing, exit_code = reExit_code.match(job_lines[end_time_before]).groups()
00584
00585 info["end"] = self.firstTimeStampBefore(end_time_before, job_lines)
00586 info["exit_code"] = exit_code
00587
00588 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "You defined your own steps to run:")
00589 steps_end = self.findFirstIndex_ofStartsWith(job_lines, "*Candle ")
00590
00591 steps = job_lines[steps_start + 1:steps_end]
00592 if not self.validateSteps(steps):
00593 self.handleParsingError( "Steps were not found corrently: %s for current job: %s" % (str(steps), str(job_lines)))
00594
00595 """ quite nasty - just a work around """
00596 print "Trying to recover from this error in case of old cmssw"
00597
00598 """ we assume that steps are between the following sentance and a TimeStamp """
00599 steps_start = self.findFirstIndex_ofStartsWith(job_lines, "Steps passed to writeCommands")
00600 steps_end = self.findLineAfter(steps_start, job_lines, test_condition = self.isTimeStamp, return_index = True)
00601
00602 steps = job_lines[steps_start + 1:steps_end]
00603 if not self.validateSteps(steps):
00604 self.handleParsingError( "EVEN AFTER RECOVERY Steps were not found corrently! : %s for current job: %s" % (str(steps), str(job_lines)))
00605 else:
00606 print "RECOVERY SEEMS to be successful: %s" % str(steps)
00607
00608 info["steps"] = self._LINE_SEPARATOR.join(steps)
00609
00610
00611 timesize_result.append(info)
00612 return {"TimeSize": timesize_result}
00613
00614
00615
00616
00617 def readCmsScimarkTest(self, testName, testType, core):
00618 lines = self.readInput(self._path, fileName = testName + ".log")
00619 scores = [{"score": self.reCmsScimarkTest.match(line).groups()[1], "type": testType, "core": core}
00620 for line in lines
00621 if self.reCmsScimarkTest.match(line)]
00622
00623 i = 0
00624 for score in scores:
00625 i += 1
00626 score.update({"messurement_number": i})
00627 return scores
00628
00629 def readCmsScimark(self, main_cores = [1]):
00630 main_core = main_cores[0]
00631
00632
00633 csimark = []
00634 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2", testType = "mainCore", core = main_core))
00635 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark2_large", testType = "mainCore_Large", core = main_core))
00636
00637
00638
00639 reIsCsiMark_notusedcore = re.compile("^cmsScimark_(\d+).log$")
00640 scimark_files = [reIsCsiMark_notusedcore.match(f).groups()[0]
00641 for f in os.listdir(self._path)
00642 if reIsCsiMark_notusedcore.match(f)
00643 and os.path.isfile(os.path.join(self._path, f)) ]
00644
00645 for core_number in scimark_files:
00646 try:
00647 csimark.extend(self.readCmsScimarkTest(testName = "cmsScimark_%s" % str(core_number), testType = "NotUsedCore_%s" %str(core_number), core = core_number))
00648 except IOError, e:
00649 if self._DEBUG:
00650 print e
00651 return csimark
00652
00653
00654 def parseTheCompletion(self):
00655 """
00656 checks if the suite has successfully finished
00657 and if the tarball was successfully archived and uploaded to the castor """
00658
00659 parsing_rules = (
00660 (("finishing_time", "", ""), r"""^Performance Suite finished running at (.+) on (.+) in directory (.+)$"""),
00661 (("castor_md5",) , r"""^The md5 checksum of the tarball: (.+)$"""),
00662 (("successfully_archived_tarball", ), r"""^Successfully archived the tarball (.+) in CASTOR!$"""),
00663
00664 (("castor_file_url",), r"""^The tarball can be found: (.+)$"""),
00665 (("castor_logfile_url",), r"""^The logfile can be found: (.+)$"""),
00666 )
00667
00668
00669 """ we apply the defined parsing rules to extract the required fields of information into the dictionary (as defined in parsing rules) """
00670 info = self._applyParsingRules(parsing_rules, self.lines_other)
00671
00672 """ did we detect any errors in log files ? """
00673 info["no_errors_detected"] = [line for line in self.lines_other if line == "There were no errors detected in any of the log files!"] and "1" or "0"
00674 if not info["successfully_archived_tarball"]:
00675 info["castor_file_url"] = ""
00676
00677 if not info["castor_file_url"]:
00678
00679 self.handleParsingError( "Castor tarball URL not found. Trying to get from environment")
00680 lmdb_castor_url_is_valid = lambda url: url.startswith("/castor/")
00681
00682 url = ""
00683 try:
00684
00685 url=self.get_tarball_fromlog()
00686 print "Extracted castor tarball full path by re-parsing cmsPerfSuite.log: %s"%url
00687
00688 except:
00689 if os.environ.has_key("PERFDB_CASTOR_FILE_URL"):
00690 url = os.environ["PERFDB_CASTOR_FILE_URL"]
00691
00692 else:
00693 print "Failed to get the tarball location from environment variable PERFDB_CASTOR_FILE_URL"
00694 self.handleParsingError( "Castor tarball URL not found. Provide interactively")
00695
00696 while True:
00697
00698 if lmdb_castor_url_is_valid(url):
00699 info["castor_file_url"] = url
00700 break
00701 print "Please enter a valid CASTOR url: has to start with /castor/ and should point to the tarball"
00702 if os.isatty(0): url = sys.stdin.readline()
00703 else: raise IOError("stdin is closed.")
00704
00705
00706 return info
00707 def get_tarball_fromlog(self):
00708 '''Return the tarball castor location by parsing the cmsPerfSuite.log file'''
00709 print "Getting the url from the cmsPerfSuite.log"
00710 log=open("cmsPerfSuite.log","r")
00711 castor_dir="UNKNOWN_CASTOR_DIR"
00712 tarball="UNKNOWN_TARBALL"
00713 for line in log.readlines():
00714 if 'castordir' in line:
00715 castor_dir=line.split()[1]
00716 if 'tgz' in line and tarball=="UNKNOWN_TARBALL":
00717 if 'tar' in line:
00718 tarball=os.path.basename(line.split()[2])
00719 castor_tarball=os.path.join(castor_dir,tarball)
00720 return castor_tarball
00721
00722 def parseAll(self):
00723 result = {"General": {}, "TestResults":{}, "cmsSciMark":{}, 'unrecognized_jobs': []}
00724
00725 """ all the general info - start, arguments, host etc """
00726 result["General"].update(self.parseGeneralInfo())
00727
00728 """ machine info - cpu, memmory """
00729 result["General"].update(self.getMachineInfo())
00730
00731 """ we add info about how successfull was the run, when it finished and final castor url to the file! """
00732 result["General"].update(self.parseTheCompletion())
00733
00734 print "Parsing TimeSize runs..."
00735 if len(self.lines_timesize) > 0:
00736 try:
00737 result["TestResults"].update(self.parseTimeSize())
00738 except Exception, e:
00739 print "BAD BAD BAD UNHANDLED ERROR in parseTimeSize: " + str(e)
00740
00741 print "Parsing Other(IgProf, Memcheck, ...) runs..."
00742 try:
00743 result["TestResults"].update(self.parseAllOtherTests())
00744 except Exception, e:
00745 print "BAD BAD BAD UNHANDLED ERROR in parseAllOtherTests: " + str(e)
00746
00747
00748
00749
00750 main_cores = [result["General"]["run_on_cpus"]]
00751 num_cores = result["General"].get("num_cores", 0)
00752
00753
00754
00755 main_cores = [1]
00756
00757
00758 result["cmsSciMark"] = self.readCmsScimark(main_cores = main_cores)
00759
00760 if self.missing_fields:
00761 self.handleParsingError("========== SOME REQUIRED FIELDS WERE NOT FOUND DURING PARSING ======= "+ str(self.missing_fields))
00762
00763 return result
00764
00765
00766
00767 if __name__ == "__main__":
00768 from xml.dom import minidom
00769 import cmssw_exportdb_xml
00770
00771
00772
00773
00774
00775 path = os.path.abspath(".")
00776
00777 p = parserPerfsuiteMetadata(path)
00778 run_info = p.parseAll()
00779
00780
00781
00782
00783
00784
00785 xml_doc = minidom.Document()
00786 cmssw_exportdb_xml.exportRunInfo(xml_doc, run_info, print_out = True)
00787
00788 import doctest
00789 doctest.testmod()
00790
00791
00792
00793