8 from Validation.Performance.parserPerfsuiteMetadata
import parserPerfsuiteMetadata
11 import Validation.Performance.parserEdmSize
as parserEdmSize
14 from commands
import getstatusoutput
16 """ indicates whether the CMSSW is [use False] available or not. on our testing machine it's not [use True] """
19 """ global variables """
20 test_timing_report_log = re.compile(
"TimingReport.log$", re.IGNORECASE)
21 test_igprof_report_log = re.compile(
"^(.*)(IgProfMem|IgProfPerf)\.gz", re.IGNORECASE)
22 test_memcheck_report_log = re.compile(
"^(.*)memcheck_vlgd.xml", re.IGNORECASE)
25 xmldoc = minidom.Document()
34 Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
36 if the cmssw version is in the system's environment (after running cmsenv):
39 otherwise one must specify the cmssw version:
40 $ %(script)s --version=CMSSW_3_2_0
41 $ %(script)s -v CMSSW_3_2_0
47 Returns the version of CMSSW to be used which it is taken from:
48 * command line parameter or
49 * environment variable
50 in case of error returns None
52 And also the directory to put the xml files to: if none --> returns ""
55 """ try to get the version for command line argument """
62 xml_dir=
"PerfSuiteDBData"
64 opts, args = getopt.getopt(argv[1:],
"v:", [
"version=",
"outdir="])
65 except getopt.GetoptError, e:
68 if opt
in (
"-v",
"--version"):
73 """ if not get it from environment string """
76 version = os.environ[
"CMSSW_VERSION"]
80 return (version, xml_dir)
87 for prod
in edm_report:
90 if not EC_count.has_key(ec):
94 for (ec, prods)
in EC_count.items():
95 print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
97 print "\n *".
join([
"%(cpp_type)s_%(module_name)s_%(module_label)s" % prod
for prod
in prods])
101 """ returns modified product by adding the event content relationship """
110 return [{
"name": seq,
"modules":
",".
join(modules)}
for (seq, modules)
in sequenceWithModulesString.items()]
114 jobID = igProfReport[
"jobID"]
116 candleLong = os.path.split(path)[1].
replace(
"_IgProf_Perf",
"").
replace(
"_IgProf_Mem",
"").
replace(
"_PU",
"")
119 if runinfo[
'TestResults'].has_key(igProfType):
120 for result
in runinfo[
'TestResults'][igProfType]:
121 if candleLong == result[
"candle"]
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
122 jobID[
"candle"] = jobID[
"candle"].
upper()
123 if not result.has_key(
"jobs"):
125 result[
'jobs'].
append(igProfReport)
130 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
131 print "JOB ID: %s " % str(jobID)
132 print " ====================== "
133 runinfo[
'unrecognized_jobs'].
append(igProfReport)
138 candleLong = os.path.split(path)[1].
replace(
"_TimeSize",
"").
replace(
"_PU",
"")
139 jobID = timeSizeReport[
"jobID"]
143 if runinfo[
'TestResults'].has_key(
'TimeSize'):
144 for result
in runinfo[
'TestResults'][
'TimeSize']:
146 """ If this is the testResult which fits TimeSize job """
149 if result[
'candle'] == candleLong
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
151 if not result.has_key(
"jobs"):
153 result[
'jobs'].
append(timeSizeReport)
158 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
159 print "JOB ID: %s " % str(jobID)
160 print " ====================== "
161 runinfo[
'unrecognized_jobs'].
append(timeSizeReport)
165 candleLong = os.path.split(path)[1].
replace(
"_Memcheck",
"").
replace(
"_PU",
"")
166 jobID = MemcheckReport[
"jobID"]
170 if runinfo[
'TestResults'].has_key(
'Memcheck'):
171 for result
in runinfo[
'TestResults'][
'Memcheck']:
174 """ If this is the testResult which fits Memcheck job """
177 if result[
'candle'] == candleLong
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
179 if not result.has_key(
"jobs"):
181 result[
'jobs'].
append(MemcheckReport)
186 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
187 print "JOB ID: %s " % str(jobID)
188 print " ====================== "
189 runinfo[
'unrecognized_jobs'].
append(MemcheckReport)
192 global release,event_content,conditions
193 """ if the release is not provided explicitly we take it from the Simulation candles file """
196 release = release_fromlogfile
197 print "release from simulation candles: %s" % release
201 raise Exception(
"the release was not found!")
204 """ process the TimingReport log files """
207 files = os.listdir(path)
208 timing_report_files = [os.path.join(path, f)
for f
in files
209 if test_timing_report_log.search(f)
210 and os.path.isfile(os.path.join(path, f)) ]
213 for timelog_f
in timing_report_files:
214 print "\nProcessing file: %s" % timelog_f
218 print "jobID: %s" % str(jobID)
219 (candle, step, pileup_type, conditions, event_content) = jobID
220 jobID =
dict(
zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID))
221 print "Dictionary based jobID %s: " % str(jobID)
224 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
226 print " ====================== The job HAS BEEN DISCARDED =============== "
227 print " NOT ALL DATA WAS AVAILABLE "
228 print " JOB ID = %s " % str(jobID)
229 print " ======================= end ===================================== "
233 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
236 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
242 pileups[
"NoPileUp"]=1
244 pileups[pileup_type] = 1
247 root_file_size =
getRootFileSize(path = path, candle = candle, step = step.replace(
':',
'='))
253 if edm_report !=
False:
256 edm_report =
map(assign_event_content_for_product, edm_report)
265 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data),
266 "metadata": {
"testname":
"TimeSize",
"root_file_size": root_file_size,
"num_events": num_events},
267 "edmSize_result": edm_report
274 global release,event_content,conditions
275 """ if the release is not provided explicitly we take it from the Simulation candles file """
278 release = release_fromlogfile
279 print "release from simulation candles: %s" % release
283 raise Exception(
"the release was not found!")
285 """ process the vlgd files """
288 files = os.listdir(path)
289 memcheck_files = [os.path.join(path, f)
for f
in files
290 if test_memcheck_report_log.search(f)
291 and os.path.isfile(os.path.join(path, f)) ]
293 if len(memcheck_files) == 0:
294 print "No _vlgd files found!"
296 for file
in memcheck_files:
299 (candle, step, pileup_type, conditions, event_content) = jobID
301 print "jobID: %s" % str(jobID)
302 jobID =
dict(
zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID))
304 print "Dictionary based jobID %s: " % str(jobID)
307 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
309 print " ====================== The job HAS BEEN DISCARDED =============== "
310 print " NOT ALL DATA WAS AVAILABLE "
311 print " JOB ID = %s " % str(jobID)
312 print " ======================= end ===================================== "
317 candles[candle.upper()] = 1
319 pileups[
"NoPileUp"]=1
321 pileups[pileup_type] = 1
328 "memcheck_errors": {
"error_num": memerror},
329 "metadata": {
"testname":
"Memcheck"},
336 globbed = glob.glob(os.path.join(path,
"*memcheck_vlgd.xml"))
342 cmd =
"grep '<error>' "+f+
" | wc -l "
343 p = os.popen(cmd,
'r')
344 errnum += int(p.readlines()[0])
350 global release,event_content,conditions
351 """ if the release is not provided explicitly we take it from the Simulation candles file """
354 release = release_fromlogfile
355 print "release from simulation candles: %s" % release
359 raise Exception(
"the release was not found!")
361 """ process the IgProf sql3 files """
364 files = os.listdir(path)
365 igprof_files = [os.path.join(path, f)
for f
in files
366 if test_igprof_report_log.search(f)
367 and os.path.isfile(os.path.join(path, f)) ]
369 if len(igprof_files) == 0:
370 print "No igprof files found!"
372 for file
in igprof_files:
375 (candle, step, pileup_type, conditions, event_content) = jobID
377 print "jobID: %s" % str(jobID)
378 jobID =
dict(
zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID))
380 print "Dictionary based jobID %s: " % str(jobID)
385 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
387 print " ====================== The job HAS BEEN DISCARDED =============== "
388 print " NOT ALL DATA WAS AVAILABLE "
389 print " JOB ID = %s " % str(jobID)
390 print " ======================= end ===================================== "
395 candles[candle.upper()] = 1
397 pileups[
"NoPileUp"]=1
399 pileups[pileup_type] = 1
407 "igprof_result": igs,
408 "metadata": {
"testname": igProfType},
418 globbed = glob.glob(os.path.join(path,
"*.sql3"))
425 cumCounts, cumCalls = profileInfo
426 dump, architecture, release, rest = f.rsplit(
"/", 3)
427 candle, sequence, pileup, conditions, process, counterType, events = rest.split(
"___")
428 events = events.replace(
".sql3",
"")
429 igresult.append({
"counter_type": counterType,
"event": events,
"cumcounts": cumCounts,
"cumcalls": cumCalls})
433 if 'diff' in ig[
'event']:
434 eventLast,eventOne = ig[
'event'].
split(
'_diff_')
435 for part
in igresult:
436 if part[
'counter_type'] == ig[
'counter_type']
and part[
'event'] == eventOne:
437 cumcountsOne = part[
'cumcounts']
438 cumcallsOne = part[
'cumcalls']
439 if part[
'counter_type'] == ig[
'counter_type']
and part[
'event'] == eventLast:
440 cumcountsLast = part[
'cumcounts']
441 cumcallsLast = part[
'cumcalls']
442 ig[
'cumcounts'] = cumcountsLast - cumcountsOne
443 ig[
'cumcalls'] = cumcallsLast - cumcallsOne
448 summary_query=
"""SELECT counter, total_count, total_freq, tick_period
450 error, output =
doQuery(summary_query, database)
451 if error
or not output
or output.count(
"\n") > 1:
453 counter, total_count, total_freq, tick_period = output.split(
"@@@")
454 if counter ==
"PERF_TICKS":
455 return float(tick_period) * float(total_count), int(total_freq)
457 return int(total_count), int(total_freq)
460 if os.path.exists(
"/usr/bin/sqlite3"):
461 sqlite=
"/usr/bin/sqlite3"
463 sqlite=
"/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
464 return getstatusoutput(
"echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
468 """ so far we will use the current dir to search in """
471 print 'full path =', os.path.abspath(path)
473 files = os.listdir(path)
475 test_timeSizeDirs = re.compile(
"_TimeSize$", re.IGNORECASE)
476 timesize_dirs = [os.path.join(path, f)
for f
in files
if test_timeSizeDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
478 for timesize_dir
in timesize_dirs:
484 """ so far we will use the current dir to search in """
487 print 'full path =', os.path.abspath(path)
489 files = os.listdir(path)
491 test_MemcheckDirs = re.compile(
"_Memcheck(.*)$", re.IGNORECASE)
492 memcheck_dirs = [os.path.join(path, f)
for f
in files
if test_MemcheckDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
494 for memcheck_dir
in memcheck_dirs:
500 """ so far we will use the current dir to search in """
503 print 'full path =', os.path.abspath(path)
505 files = os.listdir(path)
507 test_IgProfDirs = re.compile(
"_IgProf(.*)$", re.IGNORECASE)
508 igprof_dirs = [os.path.join(path, f)
for f
in files
if test_IgProfDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
510 for igprof_dir
in igprof_dirs:
515 """ Exports the sequences to XML Doc """
517 env_cmssw_version = os.environ[
"CMSSW_VERSION"]
519 print "<<<<< ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
520 Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
521 env_cmssw_version =
None
523 print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
528 if __name__ ==
"__main__":
536 """ print usage(sys.argv)
538 print "The version was not provided explicitly, will try to get one from SimulationCandles file """
541 # Export the metadata from cmsPerfSuite.log (in current working directory!)
542 print "Parsing cmsPerfSuite.log: getting all the metadata concerning the run"
543 p = parserPerfsuiteMetadata(os.getcwd())
544 run_info = p.parseAll()
546 print "Loading Sequences and Event-Content(s). Please wait..."
549 EventContents_OK = False
553 import Validation.Performance.ModuleToSequenceAssign as ModuleToSequenceAssign
558 import Validation.Performance.parseEventContent as parseEventContent
559 EventContents_OK = True
563 print "Parsing TimeSize report"
564 # Search for TimeSize files: EdmSize, TimingReport
565 searchTimeSizeFiles(run_info)
566 print "Parsing IgProf report"
567 # Search for IgProf files
568 searchIgProfFiles(run_info)
569 print "Parsing Memcheck report"
570 # Search for Memcheck files
571 searchMemcheckFiles(run_info)
574 print "Exporting sequences and event-content rules"
576 """ for testing on laptom we have no CMSSW
"""
577 # export sequences (for currently loaded CMSSW)
582 # export event's content rules
583 eventContentRules = parseEventContent.getTxtEventContentRules()
584 cmssw_exportdb_xml.exportECRules(xmldoc, eventContentRules)
587 cmssw_exportdb_xml.exportRunInfo(xmldoc, run_info, release = release)
588 #save the XML file, TODO: change fileName after introducting the JobID
590 now = datetime.datetime.now()
591 #Changing slightly the XML filename format
592 #FIXME: review this convention and archive the xml in a separate CASTOR xml directory for quick recovery of DB...
593 file_name = "%s___%s___%s___%s___%s___%s___%s.xml" % (release, "_".join(steps.keys()), "_".join(candles.keys()), "_".join(pileups.keys()),event_content,conditions,now.isoformat())
594 print "Writing the output to: %s " % file_name
596 write_xml(xmldoc, output_dir, file_name) #change this function to be able to handle directories in remote machines (via tar pipes for now could always revert to rsync later).
597 #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def getJobID_fromIgProfLogName
def read_SimulationCandles
def get_modules_sequences_relationships
def read_ConfigurationFromSimulationCandles
static std::string join(char **cmd)
def assign_event_content_for_product
def getJobID_fromMemcheckLogName