00001
00002
00003 import sys, os, re
00004 import getopt
00005 from Validation.Performance.parserTimingReport import *
00006 from Validation.Performance.cmssw_exportdb_xml import *
00007 import Validation.Performance.cmssw_exportdb_xml as cmssw_exportdb_xml
00008 from Validation.Performance.parserPerfsuiteMetadata import parserPerfsuiteMetadata
00009
00010 from Validation.Performance.FileNamesHelper import *
00011 import Validation.Performance.parserEdmSize as parserEdmSize
00012
00013 """ indicates whether the CMSSW is [use False] available or not. on our testing machine it's not [use True] """
00014 _TEST_RUN = False
00015
00016 """ global variables """
00017 test_timing_report_log = re.compile("TimingReport.log$", re.IGNORECASE)
00018
00019
00020 xmldoc = minidom.Document()
00021 release = None
00022 steps = {}
00023 candles = {}
00024 pileups = {}
00025
00026 def usage(argv):
00027 script = argv[0]
00028 return """
00029 Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
00030
00031 if the cmssw version is in the system's environment (after running cmsenv):
00032 $ %(script)s
00033
00034 otherwise one must specify the cmssw version:
00035 $ %(script)s --version=CMSSW_3_2_0
00036 $ %(script)s -v CMSSW_3_2_0
00037
00038 """ % locals()
00039
00040 def get_params(argv):
00041 """
00042 Returns the version of CMSSW to be used which it is taken from:
00043 * command line parameter or
00044 * environment variable
00045 in case of error returns None
00046
00047 And also the directory to put the xml files to: if none --> returns ""
00048 """
00049
00050 """ try to get the version for command line argument """
00051
00052
00053 version = None
00054
00055
00056
00057 xml_dir="PerfSuiteDBData"
00058 try:
00059 opts, args = getopt.getopt(argv[1:], "v:", ["version=", "outdir="])
00060 except getopt.GetoptError, e:
00061 print e
00062 for opt, arg in opts:
00063 if opt in ("-v", "--version"):
00064 version = arg
00065 if opt == "--outdir":
00066 xml_dir = arg
00067
00068 """ if not get it from environment string """
00069 if not version:
00070 try:
00071 version = os.environ["CMSSW_VERSION"]
00072 except KeyError:
00073 pass
00074
00075 return (version, xml_dir)
00076
00077 def _eventContent_DEBUG(edm_report):
00078
00079 EC_count = {}
00080 if not _TEST_RUN:
00081
00082 for prod in edm_report:
00083 ecs = parseEventContent.List_ECs_forProduct(prod)
00084 for ec in ecs:
00085 if not EC_count.has_key(ec):
00086 EC_count[ec] = []
00087 EC_count[ec].append(prod)
00088
00089 for (ec, prods) in EC_count.items():
00090 print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
00091
00092 print "\n *".join(["%(cpp_type)s_%(module_name)s_%(module_label)s" % prod for prod in prods])
00093
00094
00095 def assign_event_content_for_product(product):
00096 """ returns modified product by adding the event content relationship """
00097
00098 if not _TEST_RUN:
00099 product["event_content"] = ",".join(parseEventContent.List_ECs_forProduct(product))
00100 return product
00101
00102
00103 def get_modules_sequences_relationships():
00104 (sequenceWithModules, sequenceWithModulesString) =ModuleToSequenceAssign.assignModulesToSeqs()
00105 return [{"name": seq, "modules": ",".join(modules)} for (seq, modules) in sequenceWithModulesString.items()]
00106
00107
00108 def exportTimeSizeJob(path, timeSizeReport, runinfo):
00109 candleLong = os.path.split(path)[1].replace("_TimeSize", "").replace("_PU", "")
00110 jobID = timeSizeReport["jobID"]
00111 print candleLong
00112
00113
00114 found = False
00115 if runinfo['TestResults'].has_key('TimeSize'):
00116 for result in runinfo['TestResults']['TimeSize']:
00117
00118 """ If this is the testResult which fits TimeSize job """
00119
00120
00121 if result['candle'] == candleLong and jobID["pileup_type"] == result['pileup_type'] and jobID["conditions"] == result['conditions'] and jobID["event_content"] == result['event_content']:
00122
00123 if not result.has_key("jobs"):
00124 result['jobs'] = []
00125 result['jobs'].append(timeSizeReport)
00126 found = True
00127 break
00128
00129 if not found:
00130 print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
00131 print "JOB ID: %s " % str(jobID)
00132 print " ====================== "
00133 runinfo['unrecognized_jobs'].append(timeSizeReport)
00134
00135
00136
00137 def process_timesize_dir(path, runinfo):
00138 global release,event_content,conditions
00139 """ if the release is not provided explicitly we take it from the Simulation candles file """
00140 if (not release):
00141 release_fromlogfile = read_SimulationCandles(path)
00142 release = release_fromlogfile
00143 print "release from simulation candles: %s" % release
00144
00145 if (not release):
00146
00147 raise Exception("the release was not found!")
00148
00149
00150 """ process the TimingReport log files """
00151
00152
00153 files = os.listdir(path)
00154 timing_report_files = [os.path.join(path, f) for f in files
00155 if test_timing_report_log.search(f)
00156 and os.path.isfile(os.path.join(path, f)) ]
00157
00158
00159 for timelog_f in timing_report_files:
00160 print "\nProcessing file: %s" % timelog_f
00161 print "------- "
00162
00163 jobID = getJobID_fromTimeReportLogName(os.path.join(path, timelog_f))
00164 print "jobID: %s" % str(jobID)
00165 (candle, step, pileup_type, conditions, event_content) = jobID
00166 jobID = dict(zip(("candle", "step", "pileup_type", "conditions", "event_content"), jobID))
00167 print "Dictionary based jobID %s: " % str(jobID)
00168
00169
00170 discard = len([key for key, value in jobID.items() if key != "pileup_type" and not value])
00171 if discard:
00172 print " ====================== The job HAS BEEN DISCARDED =============== "
00173 print " NOT ALL DATA WAS AVAILABLE "
00174 print " JOB ID = %s " % str(jobID)
00175 print " ======================= end ===================================== "
00176 continue
00177
00178 num_events = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup = pileup_type)["num_events"]
00179
00180 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
00181
00182 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_name")
00183 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
00184
00185
00186 steps[step] = 1
00187 candles[candle] = 1
00188 if pileup_type=="":
00189 pileups["NoPileUp"]=1
00190 else:
00191 pileups[pileup_type] = 1
00192
00193
00194 root_file_size = getRootFileSize(path = path, candle = candle, step = step)
00195
00196
00197 edm_report = parserEdmSize.getEdmReport(path = path, candle = candle, step = step)
00198 if edm_report != False:
00199 try:
00200
00201 edm_report = map(assign_event_content_for_product, edm_report)
00202
00203 _eventContent_DEBUG(edm_report)
00204 except Exception, e:
00205 print e
00206
00207
00208 timeSizeReport = {
00209 "jobID":jobID,
00210 "release": release,
00211 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data),
00212 "metadata": {"root_file_size": root_file_size, "num_events": num_events},
00213 "edmSize_result": edm_report
00214 }
00215
00216
00217 exportTimeSizeJob(path, timeSizeReport, runinfo)
00218
00219
00220 def searchTimeSizeFiles(runinfo):
00221 """ so far we will use the current dir to search in """
00222 path = os.getcwd()
00223
00224 print 'full path =', os.path.abspath(path)
00225
00226 files = os.listdir(path)
00227
00228 test_timeSizeDirs = re.compile("_TimeSize$", re.IGNORECASE)
00229 timesize_dirs = [os.path.join(path, f) for f in files if test_timeSizeDirs.search(f) and os.path.isdir(os.path.join(path, f))]
00230
00231 for timesize_dir in timesize_dirs:
00232
00233 process_timesize_dir(timesize_dir, runinfo)
00234
00235 def exportSequences():
00236 """ Exports the sequences to XML Doc """
00237 try:
00238 env_cmssw_version = os.environ["CMSSW_VERSION"]
00239 except KeyError:
00240 print "<<<<< ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
00241 Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
00242 env_cmssw_version = None
00243
00244 print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
00245 xml_export_Sequences(xml_doc = xmldoc, sequences = get_modules_sequences_relationships(), release=release)
00246
00247
00248
00249 if __name__ == "__main__":
00250
00251
00252
00253
00254 (release, output_dir) = get_params(sys.argv)
00255
00256 if not release:
00257 """ print usage(sys.argv)
00258 sys.exit(2) """
00259 print "The version was not provided explicitly, will try to get one from SimulationCandles file """
00260
00261
00262 # Export the metadata from cmsPerfSuite.log (in current working directory!)
00263 print "Parsing cmsPerfSuite.log: getting all the metadata concerning the run"
00264 p = parserPerfsuiteMetadata(os.getcwd())
00265 run_info = p.parseAll()
00266
00267 print "Loading Sequences and Event-Content(s). Please wait..."
00268
00269 Sequences_OK = False
00270 EventContents_OK = False
00271
00272 if not _TEST_RUN:
00273 try:
00274 import Validation.Performance.ModuleToSequenceAssign as ModuleToSequenceAssign
00275 Sequences_OK = True
00276 except Exception, e:
00277 print e
00278 try:
00279 import Validation.Performance.parseEventContent as parseEventContent
00280 EventContents_OK = True
00281 except Exception, e:
00282 print e
00283
00284 print "Parsing TimeSize report"
00285 # Search for TimeSize files: EdmSize, TimingReport
00286 searchTimeSizeFiles(run_info)
00287 #print run_info
00288
00289 print "Exporting sequences and event-content rules"
00290 if not _TEST_RUN:
00291 """ for testing on laptom we have no CMSSW """
00292 # export sequences (for currently loaded CMSSW)
00293 if Sequences_OK:
00294 exportSequences()
00295
00296 if EventContents_OK:
00297 # export event's content rules
00298 eventContentRules = parseEventContent.getTxtEventContentRules()
00299 cmssw_exportdb_xml.exportECRules(xmldoc, eventContentRules)
00300
00301
00302 cmssw_exportdb_xml.exportRunInfo(xmldoc, run_info, release = release)
00303 #save the XML file, TODO: change fileName after introducting the JobID
00304 import datetime
00305 now = datetime.datetime.now()
00306 #Changing slightly the XML filename format
00307 #FIXME: review this convention and archive the xml in a separate CASTOR xml directory for quick recovery of DB...
00308 file_name = "%s___%s___%s___%s___%s___%s___%s.xml" % (release, "_".join(steps.keys()), "_".join(candles.keys()), "_".join(pileups.keys()),event_content,conditions,now.isoformat())
00309 print "Writing the output to: %s " % file_name
00310
00311 write_xml(xmldoc, output_dir, file_name) #change this function to be able to handle directories in remote machines (via tar pipes for now could always revert to rsync later).
00312 #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py