8 from Validation.Performance.parserPerfsuiteMetadata
import parserPerfsuiteMetadata
11 import Validation.Performance.parserEdmSize
as parserEdmSize
13 """ indicates whether the CMSSW is [use False] available or not. on our testing machine it's not [use True] """
16 """ global variables """
17 test_timing_report_log = re.compile(
"TimingReport.log$", re.IGNORECASE)
20 xmldoc = minidom.Document()
29 Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
31 if the cmssw version is in the system's environment (after running cmsenv):
34 otherwise one must specify the cmssw version:
35 $ %(script)s --version=CMSSW_3_2_0
36 $ %(script)s -v CMSSW_3_2_0
42 Returns the version of CMSSW to be used which it is taken from:
43 * command line parameter or
44 * environment variable
45 in case of error returns None
47 And also the directory to put the xml files to: if none --> returns ""
50 """ try to get the version for command line argument """
57 xml_dir=
"PerfSuiteDBData"
59 opts, args = getopt.getopt(argv[1:],
"v:", [
"version=",
"outdir="])
60 except getopt.GetoptError, e:
63 if opt
in (
"-v",
"--version"):
68 """ if not get it from environment string """
71 version = os.environ[
"CMSSW_VERSION"]
75 return (version, xml_dir)
82 for prod
in edm_report:
85 if not EC_count.has_key(ec):
89 for (ec, prods)
in EC_count.items():
90 print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
92 print "\n *".
join([
"%(cpp_type)s_%(module_name)s_%(module_label)s" % prod
for prod
in prods])
96 """ returns modified product by adding the event content relationship """
105 return [{
"name": seq,
"modules":
",".
join(modules)}
for (seq, modules)
in sequenceWithModulesString.items()]
109 candleLong = os.path.split(path)[1].
replace(
"_TimeSize",
"").
replace(
"_PU",
"")
110 jobID = timeSizeReport[
"jobID"]
115 if runinfo[
'TestResults'].has_key(
'TimeSize'):
116 for result
in runinfo[
'TestResults'][
'TimeSize']:
118 """ If this is the testResult which fits TimeSize job """
121 if result[
'candle'] == candleLong
and jobID[
"pileup_type"] == result[
'pileup_type']
and jobID[
"conditions"] == result[
'conditions']
and jobID[
"event_content"] == result[
'event_content']:
123 if not result.has_key(
"jobs"):
125 result[
'jobs'].
append(timeSizeReport)
130 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
131 print "JOB ID: %s " % str(jobID)
132 print " ====================== "
133 runinfo[
'unrecognized_jobs'].
append(timeSizeReport)
138 global release,event_content,conditions
139 """ if the release is not provided explicitly we take it from the Simulation candles file """
142 release = release_fromlogfile
143 print "release from simulation candles: %s" % release
147 raise Exception(
"the release was not found!")
150 """ process the TimingReport log files """
153 files = os.listdir(path)
154 timing_report_files = [os.path.join(path, f)
for f
in files
155 if test_timing_report_log.search(f)
156 and os.path.isfile(os.path.join(path, f)) ]
159 for timelog_f
in timing_report_files:
160 print "\nProcessing file: %s" % timelog_f
164 print "jobID: %s" % str(jobID)
165 (candle, step, pileup_type, conditions, event_content) = jobID
166 jobID =
dict(zip((
"candle",
"step",
"pileup_type",
"conditions",
"event_content"), jobID))
167 print "Dictionary based jobID %s: " % str(jobID)
170 discard = len([key
for key, value
in jobID.items()
if key !=
"pileup_type" and not value])
172 print " ====================== The job HAS BEEN DISCARDED =============== "
173 print " NOT ALL DATA WAS AVAILABLE "
174 print " JOB ID = %s " % str(jobID)
175 print " ======================= end ===================================== "
180 (mod_timelog, evt_timelog, rss_data, vsize_data) =
loadTimeLog(timelog_f)
183 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
189 pileups[
"NoPileUp"]=1
191 pileups[pileup_type] = 1
194 root_file_size =
getRootFileSize(path = path, candle = candle, step = step)
198 if edm_report !=
False:
201 edm_report =
map(assign_event_content_for_product, edm_report)
211 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data),
212 "metadata": {
"root_file_size": root_file_size,
"num_events": num_events},
213 "edmSize_result": edm_report
221 """ so far we will use the current dir to search in """
224 print 'full path =', os.path.abspath(path)
226 files = os.listdir(path)
228 test_timeSizeDirs = re.compile(
"_TimeSize$", re.IGNORECASE)
229 timesize_dirs = [os.path.join(path, f)
for f
in files
if test_timeSizeDirs.search(f)
and os.path.isdir(os.path.join(path, f))]
231 for timesize_dir
in timesize_dirs:
236 """ Exports the sequences to XML Doc """
238 env_cmssw_version = os.environ[
"CMSSW_VERSION"]
240 print "<<<<< ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
241 Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
242 env_cmssw_version =
None
244 print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
249 if __name__ ==
"__main__":
257 """ print usage(sys.argv)
259 print "The version was not provided explicitly, will try to get one from SimulationCandles file """
262 # Export the metadata from cmsPerfSuite.log (in current working directory!)
263 print "Parsing cmsPerfSuite.log: getting all the metadata concerning the run"
264 p = parserPerfsuiteMetadata(os.getcwd())
265 run_info = p.parseAll()
267 print "Loading Sequences and Event-Content(s). Please wait..."
270 EventContents_OK = False
274 import Validation.Performance.ModuleToSequenceAssign as ModuleToSequenceAssign
279 import Validation.Performance.parseEventContent as parseEventContent
280 EventContents_OK = True
284 print "Parsing TimeSize report"
285 # Search for TimeSize files: EdmSize, TimingReport
286 searchTimeSizeFiles(run_info)
289 print "Exporting sequences and event-content rules"
291 """ for testing on laptom we have no CMSSW
"""
292 # export sequences (for currently loaded CMSSW)
297 # export event's content rules
298 eventContentRules = parseEventContent.getTxtEventContentRules()
299 cmssw_exportdb_xml.exportECRules(xmldoc, eventContentRules)
302 cmssw_exportdb_xml.exportRunInfo(xmldoc, run_info, release = release)
303 #save the XML file, TODO: change fileName after introducting the JobID
305 now = datetime.datetime.now()
306 #Changing slightly the XML filename format
307 #FIXME: review this convention and archive the xml in a separate CASTOR xml directory for quick recovery of DB...
308 file_name = "%s___%s___%s___%s___%s___%s___%s.xml" % (release, "_".join(steps.keys()), "_".join(candles.keys()), "_".join(pileups.keys()),event_content,conditions,now.isoformat())
309 print "Writing the output to: %s " % file_name
311 write_xml(xmldoc, output_dir, file_name) #change this function to be able to handle directories in remote machines (via tar pipes for now could always revert to rsync later).
312 #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py
def getJobID_fromTimeReportLogName
def processModuleTimeLogData
mod_data["stats"] =calc_MinMaxAvgRMS(f_time = lambda x: x["time"], f_evt_num = lambda x: x["event_num...
def read_SimulationCandles
def get_modules_sequences_relationships
def read_ConfigurationFromSimulationCandles
static std::string join(char **cmd)
def assign_event_content_for_product