00001
00002
00003 import sys, os, re
00004 import getopt
00005 from Validation.Performance.parserTimingReport import *
00006 from Validation.Performance.cmssw_exportdb_xml import *
00007 import Validation.Performance.cmssw_exportdb_xml as cmssw_exportdb_xml
00008 from Validation.Performance.parserPerfsuiteMetadata import parserPerfsuiteMetadata
00009
00010 from Validation.Performance.FileNamesHelper import *
00011 import Validation.Performance.parserEdmSize as parserEdmSize
00012
00013 import glob
00014 from commands import getstatusoutput
00015
00016 """ indicates whether the CMSSW is [use False] available or not. on our testing machine it's not [use True] """
00017 _TEST_RUN = False
00018
00019 """ global variables """
00020 test_timing_report_log = re.compile("TimingReport.log$", re.IGNORECASE)
00021 test_igprof_report_log = re.compile("^(.*)(IgProfMem|IgProfPerf)\.gz", re.IGNORECASE)
00022 test_memcheck_report_log = re.compile("^(.*)memcheck_vlgd.xml", re.IGNORECASE)
00023
00024
00025 xmldoc = minidom.Document()
00026 release = None
00027 steps = {}
00028 candles = {}
00029 pileups = {}
00030
00031 def usage(argv):
00032 script = argv[0]
00033 return """
00034 Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
00035
00036 if the cmssw version is in the system's environment (after running cmsenv):
00037 $ %(script)s
00038
00039 otherwise one must specify the cmssw version:
00040 $ %(script)s --version=CMSSW_3_2_0
00041 $ %(script)s -v CMSSW_3_2_0
00042
00043 """ % locals()
00044
00045 def get_params(argv):
00046 """
00047 Returns the version of CMSSW to be used which it is taken from:
00048 * command line parameter or
00049 * environment variable
00050 in case of error returns None
00051
00052 And also the directory to put the xml files to: if none --> returns ""
00053 """
00054
00055 """ try to get the version for command line argument """
00056
00057
00058 version = None
00059
00060
00061
00062 xml_dir="PerfSuiteDBData"
00063 try:
00064 opts, args = getopt.getopt(argv[1:], "v:", ["version=", "outdir="])
00065 except getopt.GetoptError, e:
00066 print e
00067 for opt, arg in opts:
00068 if opt in ("-v", "--version"):
00069 version = arg
00070 if opt == "--outdir":
00071 xml_dir = arg
00072
00073 """ if not get it from environment string """
00074 if not version:
00075 try:
00076 version = os.environ["CMSSW_VERSION"]
00077 except KeyError:
00078 pass
00079
00080 return (version, xml_dir)
00081
00082 def _eventContent_DEBUG(edm_report):
00083
00084 EC_count = {}
00085 if not _TEST_RUN:
00086
00087 for prod in edm_report:
00088 ecs = parseEventContent.List_ECs_forProduct(prod)
00089 for ec in ecs:
00090 if not EC_count.has_key(ec):
00091 EC_count[ec] = []
00092 EC_count[ec].append(prod)
00093
00094 for (ec, prods) in EC_count.items():
00095 print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
00096
00097 print "\n *".join(["%(cpp_type)s_%(module_name)s_%(module_label)s" % prod for prod in prods])
00098
00099
00100 def assign_event_content_for_product(product):
00101 """ returns modified product by adding the event content relationship """
00102
00103 if not _TEST_RUN:
00104 product["event_content"] = ",".join(parseEventContent.List_ECs_forProduct(product))
00105 return product
00106
00107
00108 def get_modules_sequences_relationships():
00109 (sequenceWithModules, sequenceWithModulesString) =ModuleToSequenceAssign.assignModulesToSeqs()
00110 return [{"name": seq, "modules": ",".join(modules)} for (seq, modules) in sequenceWithModulesString.items()]
00111
00112
00113 def exportIgProfReport(path, igProfReport, igProfType, runinfo):
00114 jobID = igProfReport["jobID"]
00115
00116 candleLong = os.path.split(path)[1].replace("_IgProf_Perf", "").replace("_IgProf_Mem", "").replace("_PU", "")
00117 found = False
00118
00119 if runinfo['TestResults'].has_key(igProfType):
00120 for result in runinfo['TestResults'][igProfType]:
00121 if candleLong == result["candle"] and jobID["pileup_type"] == result['pileup_type'] and jobID["conditions"] == result['conditions'] and jobID["event_content"] == result['event_content']:
00122 jobID["candle"] = jobID["candle"].upper()
00123 if not result.has_key("jobs"):
00124 result['jobs'] = []
00125 result['jobs'].append(igProfReport)
00126 found = True
00127 break
00128
00129 if not found:
00130 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
00131 print "JOB ID: %s " % str(jobID)
00132 print " ====================== "
00133 runinfo['unrecognized_jobs'].append(igProfReport)
00134
00135
00136
00137 def exportTimeSizeJob(path, timeSizeReport, runinfo):
00138 candleLong = os.path.split(path)[1].replace("_TimeSize", "").replace("_PU", "")
00139 jobID = timeSizeReport["jobID"]
00140
00141
00142 found = False
00143 if runinfo['TestResults'].has_key('TimeSize'):
00144 for result in runinfo['TestResults']['TimeSize']:
00145
00146 """ If this is the testResult which fits TimeSize job """
00147
00148
00149 if result['candle'] == candleLong and jobID["pileup_type"] == result['pileup_type'] and jobID["conditions"] == result['conditions'] and jobID["event_content"] == result['event_content']:
00150
00151 if not result.has_key("jobs"):
00152 result['jobs'] = []
00153 result['jobs'].append(timeSizeReport)
00154 found = True
00155 break
00156
00157 if not found:
00158 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
00159 print "JOB ID: %s " % str(jobID)
00160 print " ====================== "
00161 runinfo['unrecognized_jobs'].append(timeSizeReport)
00162
00163
00164 def exportMemcheckReport(path, MemcheckReport, runinfo):
00165 candleLong = os.path.split(path)[1].replace("_Memcheck", "").replace("_PU", "")
00166 jobID = MemcheckReport["jobID"]
00167
00168
00169 found = False
00170 if runinfo['TestResults'].has_key('Memcheck'):
00171 for result in runinfo['TestResults']['Memcheck']:
00172
00173
00174 """ If this is the testResult which fits Memcheck job """
00175
00176
00177 if result['candle'] == candleLong and jobID["pileup_type"] == result['pileup_type'] and jobID["conditions"] == result['conditions'] and jobID["event_content"] == result['event_content']:
00178
00179 if not result.has_key("jobs"):
00180 result['jobs'] = []
00181 result['jobs'].append(MemcheckReport)
00182 found = True
00183 break
00184
00185 if not found:
00186 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
00187 print "JOB ID: %s " % str(jobID)
00188 print " ====================== "
00189 runinfo['unrecognized_jobs'].append(MemcheckReport)
00190
00191 def process_timesize_dir(path, runinfo):
00192 global release,event_content,conditions
00193 """ if the release is not provided explicitly we take it from the Simulation candles file """
00194 if (not release):
00195 release_fromlogfile = read_SimulationCandles(path)
00196 release = release_fromlogfile
00197 print "release from simulation candles: %s" % release
00198
00199 if (not release):
00200
00201 raise Exception("the release was not found!")
00202
00203
00204 """ process the TimingReport log files """
00205
00206
00207 files = os.listdir(path)
00208 timing_report_files = [os.path.join(path, f) for f in files
00209 if test_timing_report_log.search(f)
00210 and os.path.isfile(os.path.join(path, f)) ]
00211
00212
00213 for timelog_f in timing_report_files:
00214 print "\nProcessing file: %s" % timelog_f
00215 print "------- "
00216
00217 jobID = getJobID_fromTimeReportLogName(os.path.join(path, timelog_f))
00218 print "jobID: %s" % str(jobID)
00219 (candle, step, pileup_type, conditions, event_content) = jobID
00220 jobID = dict(zip(("candle", "step", "pileup_type", "conditions", "event_content"), jobID))
00221 print "Dictionary based jobID %s: " % str(jobID)
00222
00223
00224 discard = len([key for key, value in jobID.items() if key != "pileup_type" and not value])
00225 if discard:
00226 print " ====================== The job HAS BEEN DISCARDED =============== "
00227 print " NOT ALL DATA WAS AVAILABLE "
00228 print " JOB ID = %s " % str(jobID)
00229 print " ======================= end ===================================== "
00230 continue
00231
00232
00233 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
00234
00235 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_name")
00236 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
00237
00238
00239 steps[step] = 1
00240 candles[candle] = 1
00241 if pileup_type=="":
00242 pileups["NoPileUp"]=1
00243 else:
00244 pileups[pileup_type] = 1
00245
00246
00247 root_file_size = getRootFileSize(path = path, candle = candle, step = step.replace(':', '='))
00248
00249 num_events = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup = pileup_type)["num_events"]
00250
00251
00252 edm_report = parserEdmSize.getEdmReport(path = path, candle = candle, step = step)
00253 if edm_report != False:
00254 try:
00255
00256 edm_report = map(assign_event_content_for_product, edm_report)
00257
00258 _eventContent_DEBUG(edm_report)
00259 except Exception, e:
00260 print e
00261
00262 timeSizeReport = {
00263 "jobID":jobID,
00264 "release": release,
00265 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data),
00266 "metadata": {"testname": "TimeSize", "root_file_size": root_file_size, "num_events": num_events},
00267 "edmSize_result": edm_report
00268 }
00269
00270
00271 exportTimeSizeJob(path, timeSizeReport, runinfo)
00272
00273 def process_memcheck_dir(path, runinfo):
00274 global release,event_content,conditions
00275 """ if the release is not provided explicitly we take it from the Simulation candles file """
00276 if (not release):
00277 release_fromlogfile = read_SimulationCandles(path)
00278 release = release_fromlogfile
00279 print "release from simulation candles: %s" % release
00280
00281 if (not release):
00282
00283 raise Exception("the release was not found!")
00284
00285 """ process the vlgd files """
00286
00287
00288 files = os.listdir(path)
00289 memcheck_files = [os.path.join(path, f) for f in files
00290 if test_memcheck_report_log.search(f)
00291 and os.path.isfile(os.path.join(path, f)) ]
00292
00293 if len(memcheck_files) == 0:
00294 print "No _vlgd files found!"
00295 else:
00296 for file in memcheck_files:
00297 jobID = getJobID_fromMemcheckLogName(os.path.join(path, file))
00298
00299 (candle, step, pileup_type, conditions, event_content) = jobID
00300
00301 print "jobID: %s" % str(jobID)
00302 jobID = dict(zip(("candle", "step", "pileup_type", "conditions", "event_content"), jobID))
00303
00304 print "Dictionary based jobID %s: " % str(jobID)
00305
00306
00307 discard = len([key for key, value in jobID.items() if key != "pileup_type" and not value])
00308 if discard:
00309 print " ====================== The job HAS BEEN DISCARDED =============== "
00310 print " NOT ALL DATA WAS AVAILABLE "
00311 print " JOB ID = %s " % str(jobID)
00312 print " ======================= end ===================================== "
00313 continue
00314
00315
00316 steps[step] = 1
00317 candles[candle.upper()] = 1
00318 if pileup_type=="":
00319 pileups["NoPileUp"]=1
00320 else:
00321 pileups[pileup_type] = 1
00322
00323 memerror = getMemcheckError(path)
00324
00325 MemcheckReport = {
00326 "jobID": jobID,
00327 "release": release,
00328 "memcheck_errors": {"error_num": memerror},
00329 "metadata": {"testname": "Memcheck"},
00330 }
00331
00332
00333 exportMemcheckReport(path, MemcheckReport, runinfo)
00334
00335 def getMemcheckError(path):
00336 globbed = glob.glob(os.path.join(path, "*memcheck_vlgd.xml"))
00337
00338 errnum = 0
00339
00340 for f in globbed:
00341
00342 cmd = "grep '<error>' "+f+ " | wc -l "
00343 p = os.popen(cmd, 'r')
00344 errnum += int(p.readlines()[0])
00345
00346 return errnum
00347
00348
00349 def process_igprof_dir(path, runinfo):
00350 global release,event_content,conditions
00351 """ if the release is not provided explicitly we take it from the Simulation candles file """
00352 if (not release):
00353 release_fromlogfile = read_SimulationCandles(path)
00354 release = release_fromlogfile
00355 print "release from simulation candles: %s" % release
00356
00357 if (not release):
00358
00359 raise Exception("the release was not found!")
00360
00361 """ process the IgProf sql3 files """
00362
00363
00364 files = os.listdir(path)
00365 igprof_files = [os.path.join(path, f) for f in files
00366 if test_igprof_report_log.search(f)
00367 and os.path.isfile(os.path.join(path, f)) ]
00368
00369 if len(igprof_files) == 0:
00370 print "No igprof files found!"
00371 else:
00372 for file in igprof_files:
00373 jobID = getJobID_fromIgProfLogName(file)
00374
00375 (candle, step, pileup_type, conditions, event_content) = jobID
00376
00377 print "jobID: %s" % str(jobID)
00378 jobID = dict(zip(("candle", "step", "pileup_type", "conditions", "event_content"), jobID))
00379
00380 print "Dictionary based jobID %s: " % str(jobID)
00381
00382 igProfType = path.split("/")[-1].replace("TTbar_", "").replace("MinBias_", "").replace("PU_", "")
00383
00384
00385 discard = len([key for key, value in jobID.items() if key != "pileup_type" and not value])
00386 if discard:
00387 print " ====================== The job HAS BEEN DISCARDED =============== "
00388 print " NOT ALL DATA WAS AVAILABLE "
00389 print " JOB ID = %s " % str(jobID)
00390 print " ======================= end ===================================== "
00391 continue
00392
00393
00394 steps[step] = 1
00395 candles[candle.upper()] = 1
00396 if pileup_type=="":
00397 pileups["NoPileUp"]=1
00398 else:
00399 pileups[pileup_type] = 1
00400
00401 igs = getIgSummary(path)
00402
00403
00404 igProfReport = {
00405 "jobID": jobID,
00406 "release": release,
00407 "igprof_result": igs,
00408 "metadata": {"testname": igProfType},
00409 }
00410
00411
00412
00413 exportIgProfReport(path, igProfReport, igProfType, runinfo)
00414
00415
00416 def getIgSummary(path):
00417 igresult = []
00418 globbed = glob.glob(os.path.join(path, "*.sql3"))
00419
00420 for f in globbed:
00421
00422 profileInfo = getSummaryInfo(f)
00423 if not profileInfo:
00424 continue
00425 cumCounts, cumCalls = profileInfo
00426 dump, architecture, release, rest = f.rsplit("/", 3)
00427 candle, sequence, pileup, conditions, process, counterType, events = rest.split("___")
00428 events = events.replace(".sql3", "")
00429 igresult.append({"counter_type": counterType, "event": events, "cumcounts": cumCounts, "cumcalls": cumCalls})
00430
00431
00432 for ig in igresult:
00433 if 'diff' in ig['event']:
00434 eventLast,eventOne = ig['event'].split('_diff_')
00435 for part in igresult:
00436 if part['counter_type'] == ig['counter_type'] and part['event'] == eventOne:
00437 cumcountsOne = part['cumcounts']
00438 cumcallsOne = part['cumcalls']
00439 if part['counter_type'] == ig['counter_type'] and part['event'] == eventLast:
00440 cumcountsLast = part['cumcounts']
00441 cumcallsLast = part['cumcalls']
00442 ig['cumcounts'] = cumcountsLast - cumcountsOne
00443 ig['cumcalls'] = cumcallsLast - cumcallsOne
00444
00445 return igresult
00446
00447 def getSummaryInfo(database):
00448 summary_query="""SELECT counter, total_count, total_freq, tick_period
00449 FROM summary;"""
00450 error, output = doQuery(summary_query, database)
00451 if error or not output or output.count("\n") > 1:
00452 return None
00453 counter, total_count, total_freq, tick_period = output.split("@@@")
00454 if counter == "PERF_TICKS":
00455 return float(tick_period) * float(total_count), int(total_freq)
00456 else:
00457 return int(total_count), int(total_freq)
00458
00459 def doQuery(query, database):
00460 if os.path.exists("/usr/bin/sqlite3"):
00461 sqlite="/usr/bin/sqlite3"
00462 else:
00463 sqlite="/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
00464 return getstatusoutput("echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
00465
00466
00467 def searchTimeSizeFiles(runinfo):
00468 """ so far we will use the current dir to search in """
00469 path = os.getcwd()
00470
00471 print 'full path =', os.path.abspath(path)
00472
00473 files = os.listdir(path)
00474
00475 test_timeSizeDirs = re.compile("_TimeSize$", re.IGNORECASE)
00476 timesize_dirs = [os.path.join(path, f) for f in files if test_timeSizeDirs.search(f) and os.path.isdir(os.path.join(path, f))]
00477
00478 for timesize_dir in timesize_dirs:
00479
00480 process_timesize_dir(timesize_dir, runinfo)
00481
00482
00483 def searchMemcheckFiles(runinfo):
00484 """ so far we will use the current dir to search in """
00485 path = os.getcwd()
00486
00487 print 'full path =', os.path.abspath(path)
00488
00489 files = os.listdir(path)
00490
00491 test_MemcheckDirs = re.compile("_Memcheck(.*)$", re.IGNORECASE)
00492 memcheck_dirs = [os.path.join(path, f) for f in files if test_MemcheckDirs.search(f) and os.path.isdir(os.path.join(path, f))]
00493
00494 for memcheck_dir in memcheck_dirs:
00495 print memcheck_dir
00496 process_memcheck_dir(memcheck_dir, runinfo)
00497
00498
00499 def searchIgProfFiles(runinfo):
00500 """ so far we will use the current dir to search in """
00501 path = os.getcwd()
00502
00503 print 'full path =', os.path.abspath(path)
00504
00505 files = os.listdir(path)
00506
00507 test_IgProfDirs = re.compile("_IgProf(.*)$", re.IGNORECASE)
00508 igprof_dirs = [os.path.join(path, f) for f in files if test_IgProfDirs.search(f) and os.path.isdir(os.path.join(path, f))]
00509
00510 for igprof_dir in igprof_dirs:
00511 print igprof_dir
00512 process_igprof_dir(igprof_dir, runinfo)
00513
00514 def exportSequences():
00515 """ Exports the sequences to XML Doc """
00516 try:
00517 env_cmssw_version = os.environ["CMSSW_VERSION"]
00518 except KeyError:
00519 print "<<<<< ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
00520 Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
00521 env_cmssw_version = None
00522
00523 print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
00524 xml_export_Sequences(xml_doc = xmldoc, sequences = get_modules_sequences_relationships(), release=release)
00525
00526
00527
00528 if __name__ == "__main__":
00529
00530
00531
00532
00533 (release, output_dir) = get_params(sys.argv)
00534
00535 if not release:
00536 """ print usage(sys.argv)
00537 sys.exit(2) """
00538 print "The version was not provided explicitly, will try to get one from SimulationCandles file """
00539
00540
00541 # Export the metadata from cmsPerfSuite.log (in current working directory!)
00542 print "Parsing cmsPerfSuite.log: getting all the metadata concerning the run"
00543 p = parserPerfsuiteMetadata(os.getcwd())
00544 run_info = p.parseAll()
00545
00546 print "Loading Sequences and Event-Content(s). Please wait..."
00547
00548 Sequences_OK = False
00549 EventContents_OK = False
00550
00551 if not _TEST_RUN:
00552 try:
00553 import Validation.Performance.ModuleToSequenceAssign as ModuleToSequenceAssign
00554 Sequences_OK = True
00555 except Exception, e:
00556 print e
00557 try:
00558 import Validation.Performance.parseEventContent as parseEventContent
00559 EventContents_OK = True
00560 except Exception, e:
00561 print e
00562
00563 print "Parsing TimeSize report"
00564 # Search for TimeSize files: EdmSize, TimingReport
00565 searchTimeSizeFiles(run_info)
00566 print "Parsing IgProf report"
00567 # Search for IgProf files
00568 searchIgProfFiles(run_info)
00569 print "Parsing Memcheck report"
00570 # Search for Memcheck files
00571 searchMemcheckFiles(run_info)
00572 #print run_info
00573
00574 print "Exporting sequences and event-content rules"
00575 if not _TEST_RUN:
00576 """ for testing on laptom we have no CMSSW """
00577 # export sequences (for currently loaded CMSSW)
00578 if Sequences_OK:
00579 exportSequences()
00580
00581 if EventContents_OK:
00582 # export event's content rules
00583 eventContentRules = parseEventContent.getTxtEventContentRules()
00584 cmssw_exportdb_xml.exportECRules(xmldoc, eventContentRules)
00585
00586
00587 cmssw_exportdb_xml.exportRunInfo(xmldoc, run_info, release = release)
00588 #save the XML file, TODO: change fileName after introducting the JobID
00589 import datetime
00590 now = datetime.datetime.now()
00591 #Changing slightly the XML filename format
00592 #FIXME: review this convention and archive the xml in a separate CASTOR xml directory for quick recovery of DB...
00593 file_name = "%s___%s___%s___%s___%s___%s___%s.xml" % (release, "_".join(steps.keys()), "_".join(candles.keys()), "_".join(pileups.keys()),event_content,conditions,now.isoformat())
00594 print "Writing the output to: %s " % file_name
00595
00596 write_xml(xmldoc, output_dir, file_name) #change this function to be able to handle directories in remote machines (via tar pipes for now could always revert to rsync later).
00597 #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py