8 from Validation.Performance.parserPerfsuiteMetadata
import parserPerfsuiteMetadata
11 import Validation.Performance.parserEdmSize
as parserEdmSize
14 from commands
import getstatusoutput
16 """ indicates whether the CMSSW is [use False] available or not. on our testing machine it's not [use True] """
19 """ global variables """
20 test_timing_report_log = re.compile(
"TimingReport.log$", re.IGNORECASE)
21 test_igprof_report_log = re.compile(
"^(.*)(IgProfMem|IgProfPerf)\.gz", re.IGNORECASE)
22 test_memcheck_report_log = re.compile(
"^(.*)memcheck_vlgd.xml", re.IGNORECASE)
25 xmldoc = minidom.Document()
34 Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
36 if the cmssw version is in the system's environment (after running cmsenv):
39 otherwise one must specify the cmssw version:
40 $ %(script)s --version=CMSSW_3_2_0
41 $ %(script)s -v CMSSW_3_2_0
47 Returns the version of CMSSW to be used which it is taken from:
48 * command line parameter or
49 * environment variable
50 in case of error returns None
52 And also the directory to put the xml files to: if none --> returns ""
55 """ try to get the version for command line argument """
62 xml_dir=
"PerfSuiteDBData"
64 opts, args = getopt.getopt(argv[1:],
"v:", [
"version=",
"outdir="])
65 except getopt.GetoptError
as e:
68 if opt
in (
"-v",
"--version"):
73 """ if not get it from environment string """
76 version = os.environ[
"CMSSW_VERSION"]
80 return (version, xml_dir)
87 for prod
in edm_report:
90 if ec
not in EC_count:
94 for (ec, prods)
in EC_count.items():
95 print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
97 print "\n *".
join([
"%(cpp_type)s_%(module_name)s_%(module_label)s" % prod
for prod
in prods])
101 """ returns modified product by adding the event content relationship """
110 return [{
"name": seq,
"modules":
",".
join(modules)}
for (seq, modules)
in sequenceWithModulesString.items()]
114 jobID = igProfReport[
"jobID"]
116 candleLong = os.path.split(path)[1].
replace(
"_IgProf_Perf",
"").
replace(
"_IgProf_Mem",
"").
replace(
"_PU",
"")
119 if igProfType
in runinfo[
'TestResults']:
120 for result
in runinfo[
'TestResults'][igProfType]:
121 if candleLong == result[
"candle"]
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
122 jobID[
"candle"] = jobID[
"candle"].
upper()
123 if "jobs" not in result:
125 result[
'jobs'].
append(igProfReport)
130 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
131 print "JOB ID: %s " % str(jobID)
132 print " ====================== "
133 runinfo[
'unrecognized_jobs'].
append(igProfReport)
138 candleLong = os.path.split(path)[1].
replace(
"_TimeSize",
"").
replace(
"_PU",
"")
139 jobID = timeSizeReport[
"jobID"]
143 if 'TimeSize' in runinfo[
'TestResults']:
144 for result
in runinfo[
'TestResults'][
'TimeSize']:
146 """ If this is the testResult which fits TimeSize job """
149 if result[
'candle'] == candleLong
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
151 if "jobs" not in result:
153 result[
'jobs'].
append(timeSizeReport)
158 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
159 print "JOB ID: %s " % str(jobID)
160 print " ====================== "
161 runinfo[
'unrecognized_jobs'].
append(timeSizeReport)
165 candleLong = os.path.split(path)[1].
replace(
"_Memcheck",
"").
replace(
"_PU",
"")
166 jobID = MemcheckReport[
"jobID"]
170 if 'Memcheck' in runinfo[
'TestResults']:
171 for result
in runinfo[
'TestResults'][
'Memcheck']:
174 """ If this is the testResult which fits Memcheck job """
177 if result[
'candle'] == candleLong
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
179 if "jobs" not in result:
181 result[
'jobs'].
append(MemcheckReport)
186 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
187 print "JOB ID: %s " % str(jobID)
188 print " ====================== "
189 runinfo[
'unrecognized_jobs'].
append(MemcheckReport)
192 global release,event_content,conditions
193 """ if the release is not provided explicitly we take it from the Simulation candles file """
196 release = release_fromlogfile
197 print "release from simulation candles: %s" % release
201 raise Exception(
"the release was not found!")
204 """ process the TimingReport log files """
207 files = os.listdir(path)
208 timing_report_files = [os.path.join(path, f)
for f
in files
209 if test_timing_report_log.search(f)
210 and os.path.isfile(os.path.join(path, f)) ]
213 for timelog_f
in timing_report_files:
214 print "\nProcessing file: %s" % timelog_f
218 print "jobID: %s" % str(jobID)
219 (candle, step, pileup_type, conditions, event_content) = jobID
220 jobID =
dict(
list(zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID)))
221 print "Dictionary based jobID %s: " % str(jobID)
224 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
226 print " ====================== The job HAS BEEN DISCARDED =============== "
227 print " NOT ALL DATA WAS AVAILABLE "
228 print " JOB ID = %s " % str(jobID)
229 print " ======================= end ===================================== "
233 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
236 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
242 pileups[
"NoPileUp"]=1
244 pileups[pileup_type] = 1
247 root_file_size =
getRootFileSize(path = path, candle = candle, step = step.replace(
':',
'='))
253 if edm_report !=
False:
256 edm_report = map(assign_event_content_for_product, edm_report)
259 except Exception
as e:
265 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data),
266 "metadata": {
"testname":
"TimeSize",
"root_file_size": root_file_size,
"num_events": num_events},
267 "edmSize_result": edm_report
274 global release,event_content,conditions
275 """ if the release is not provided explicitly we take it from the Simulation candles file """
278 release = release_fromlogfile
279 print "release from simulation candles: %s" % release
283 raise Exception(
"the release was not found!")
285 """ process the vlgd files """
288 files = os.listdir(path)
289 memcheck_files = [os.path.join(path, f)
for f
in files
290 if test_memcheck_report_log.search(f)
291 and os.path.isfile(os.path.join(path, f)) ]
293 if len(memcheck_files) == 0:
294 print "No _vlgd files found!"
296 for file
in memcheck_files:
299 (candle, step, pileup_type, conditions, event_content) = jobID
301 print "jobID: %s" % str(jobID)
302 jobID =
dict(
list(zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID)))
304 print "Dictionary based jobID %s: " % str(jobID)
307 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
309 print " ====================== The job HAS BEEN DISCARDED =============== "
310 print " NOT ALL DATA WAS AVAILABLE "
311 print " JOB ID = %s " % str(jobID)
312 print " ======================= end ===================================== "
317 candles[candle.upper()] = 1
319 pileups[
"NoPileUp"]=1
321 pileups[pileup_type] = 1
328 "memcheck_errors": {
"error_num": memerror},
329 "metadata": {
"testname":
"Memcheck"},
336 globbed = glob.glob(os.path.join(path,
"*memcheck_vlgd.xml"))
342 cmd =
"grep '<error>' "+f+
" | wc -l "
343 p = os.popen(cmd,
'r')
344 errnum += int(p.readlines()[0])
350 global release,event_content,conditions
351 """ if the release is not provided explicitly we take it from the Simulation candles file """
354 release = release_fromlogfile
355 print "release from simulation candles: %s" % release
359 raise Exception(
"the release was not found!")
361 """ process the IgProf sql3 files """
364 files = os.listdir(path)
365 igprof_files = [os.path.join(path, f)
for f
in files
366 if test_igprof_report_log.search(f)
367 and os.path.isfile(os.path.join(path, f)) ]
369 if len(igprof_files) == 0:
370 print "No igprof files found!"
372 for file
in igprof_files:
375 (candle, step, pileup_type, conditions, event_content) = jobID
377 print "jobID: %s" % str(jobID)
378 jobID =
dict(
list(zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID)))
380 print "Dictionary based jobID %s: " % str(jobID)
385 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
387 print " ====================== The job HAS BEEN DISCARDED =============== "
388 print " NOT ALL DATA WAS AVAILABLE "
389 print " JOB ID = %s " % str(jobID)
390 print " ======================= end ===================================== "
395 candles[candle.upper()] = 1
397 pileups[
"NoPileUp"]=1
399 pileups[pileup_type] = 1
407 "igprof_result": igs,
408 "metadata": {
"testname": igProfType},
418 globbed = glob.glob(os.path.join(path,
"*.sql3"))
425 cumCounts, cumCalls = profileInfo
426 dump, architecture, release, rest = f.rsplit(
"/", 3)
427 candle, sequence, pileup, conditions, process, counterType, events = rest.split(
"___")
428 events = events.replace(
".sql3",
"")
429 igresult.append({
"counter_type": counterType,
"event": events,
"cumcounts": cumCounts,
"cumcalls": cumCalls})
433 if 'diff' in ig[
'event']:
434 eventLast,eventOne = ig[
'event'].
split(
'_diff_')
435 for part
in igresult:
436 if part[
'counter_type'] == ig[
'counter_type']
and part[
'event'] == eventOne:
437 cumcountsOne = part[
'cumcounts']
438 cumcallsOne = part[
'cumcalls']
439 if part[
'counter_type'] == ig[
'counter_type']
and part[
'event'] == eventLast:
440 cumcountsLast = part[
'cumcounts']
441 cumcallsLast = part[
'cumcalls']
442 ig[
'cumcounts'] = cumcountsLast - cumcountsOne
443 ig[
'cumcalls'] = cumcallsLast - cumcallsOne
448 summary_query=
"""SELECT counter, total_count, total_freq, tick_period
450 error, output =
doQuery(summary_query, database)
451 if error
or not output
or output.count(
"\n") > 1:
453 counter, total_count, total_freq, tick_period = output.split(
"@@@")
454 if counter ==
"PERF_TICKS":
455 return float(tick_period) * float(total_count), int(total_freq)
457 return int(total_count), int(total_freq)
460 if os.path.exists(
"/usr/bin/sqlite3"):
461 sqlite=
"/usr/bin/sqlite3"
463 sqlite=
"/afs/cern.ch/user/e/eulisse/www/bin/sqlite"
464 return getstatusoutput(
"echo '%s' | %s -separator @@@ %s" % (query, sqlite, database))
468 """ so far we will use the current dir to search in """
471 print 'full path =', os.path.abspath(path)
473 files = os.listdir(path)
475 test_timeSizeDirs = re.compile(
"_TimeSize$", re.IGNORECASE)
476 timesize_dirs = [os.path.join(path, f)
for f
in files
if test_timeSizeDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
478 for timesize_dir
in timesize_dirs:
484 """ so far we will use the current dir to search in """
487 print 'full path =', os.path.abspath(path)
489 files = os.listdir(path)
491 test_MemcheckDirs = re.compile(
"_Memcheck(.*)$", re.IGNORECASE)
492 memcheck_dirs = [os.path.join(path, f)
for f
in files
if test_MemcheckDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
494 for memcheck_dir
in memcheck_dirs:
500 """ so far we will use the current dir to search in """
503 print 'full path =', os.path.abspath(path)
505 files = os.listdir(path)
507 test_IgProfDirs = re.compile(
"_IgProf(.*)$", re.IGNORECASE)
508 igprof_dirs = [os.path.join(path, f)
for f
in files
if test_IgProfDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
510 for igprof_dir
in igprof_dirs:
515 """ Exports the sequences to XML Doc """
517 env_cmssw_version = os.environ[
"CMSSW_VERSION"]
519 print "<<<<< ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
520 Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
521 env_cmssw_version =
None
523 print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
528 if __name__ ==
"__main__":
536 """ print usage(sys.argv)
538 print "The version was not provided explicitly, will try to get one from SimulationCandles file """
541 # Export the metadata from cmsPerfSuite.log (in current working directory!)
542 print "Parsing cmsPerfSuite.log: getting all the metadata concerning the run"
543 p = parserPerfsuiteMetadata(os.getcwd())
544 run_info = p.parseAll()
546 print "Loading Sequences and Event-Content(s). Please wait..."
549 EventContents_OK = False
553 import Validation.Performance.ModuleToSequenceAssign as ModuleToSequenceAssign
555 except Exception as e:
558 import Validation.Performance.parseEventContent as parseEventContent
559 EventContents_OK = True
560 except Exception as e:
563 print "Parsing TimeSize report"
564 # Search for TimeSize files: EdmSize, TimingReport
565 searchTimeSizeFiles(run_info)
566 print "Parsing IgProf report"
567 # Search for IgProf files
568 searchIgProfFiles(run_info)
569 print "Parsing Memcheck report"
570 # Search for Memcheck files
571 searchMemcheckFiles(run_info)
574 print "Exporting sequences and event-content rules"
576 """ for testing on laptom we have no CMSSW
"""
577 # export sequences (for currently loaded CMSSW)
582 # export event's content rules
583 eventContentRules = parseEventContent.getTxtEventContentRules()
584 cmssw_exportdb_xml.exportECRules(xmldoc, eventContentRules)
587 cmssw_exportdb_xml.exportRunInfo(xmldoc, run_info, release = release)
588 #save the XML file, TODO: change fileName after introducting the JobID
590 now = datetime.datetime.now()
591 #Changing slightly the XML filename format
592 #FIXME: review this convention and archive the xml in a separate CASTOR xml directory for quick recovery of DB...
593 file_name = "%s___%s___%s___%s___%s___%s___%s.xml" % (release, "_".join(steps.keys()), "_".join(candles.keys()), "_".join(pileups.keys()),event_content,conditions,now.isoformat())
594 print "Writing the output to: %s " % file_name
596 write_xml(xmldoc, output_dir, file_name) #change this function to be able to handle directories in remote machines (via tar pipes for now could always revert to rsync later).
597 #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
def getJobID_fromIgProfLogName
def read_SimulationCandles
def get_modules_sequences_relationships
def read_ConfigurationFromSimulationCandles
static std::string join(char **cmd)
def assign_event_content_for_product
def getJobID_fromMemcheckLogName
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run