Functions
def	_eventContent_DEBUG
def	assign_event_content_for_product
def	exportSequences
def	exportTimeSizeJob
def	get_modules_sequences_relationships
def	get_params
def	process_timesize_dir
def	searchTimeSizeFiles
def	usage
Variables
	_TEST_RUN = False
dictionary	candles = {}
tuple	eventContentRules = parseEventContent.getTxtEventContentRules()
	EventContents_OK = False
string	file_name = "%s___%s___%s___%s___%s___%s___%s.xml"
tuple	now = datetime.datetime.now()
tuple	p = parserPerfsuiteMetadata(os.getcwd())
dictionary	pileups = {}
	release = None
tuple	run_info = p.parseAll()
	Sequences_OK = False
dictionary	steps = {}
tuple	test_timing_report_log = re.compile("TimingReport.log$", re.IGNORECASE)
tuple	xmldoc = minidom.Document()

Function Documentation

def cmsPerfSuiteHarvest::_eventContent_DEBUG ( edm_report ) [private]

Definition at line 77 of file cmsPerfSuiteHarvest.py.

00078                                    :
00079         # for testing / information
00080         EC_count = {}
00081         if not _TEST_RUN:
00082                 # count the products in event-content's
00083                 for prod in edm_report:
00084                         ecs = parseEventContent.List_ECs_forProduct(prod)
00085                         for ec in ecs:
00086                                 if not EC_count.has_key(ec):
00087                                         EC_count[ec] = []       
00088                                 EC_count[ec].append(prod)
00089                 #print out the statistics
00090                 for (ec, prods) in EC_count.items():
00091                         print "==== %s EVENT CONTENT: have %d items, the listing is: ===" % (ec, len(prods))
00092                         # list of products
00093                         print "\n *".join(["%(cpp_type)s_%(module_name)s_%(module_label)s" % prod for prod in prods])
00094

def cmsPerfSuiteHarvest::assign_event_content_for_product ( product )

returns modified product by adding the event content relationship

Definition at line 95 of file cmsPerfSuiteHarvest.py.

00096                                              :
00097         """ returns modified product by adding the event content relationship """
00098 
00099         if not _TEST_RUN:
00100                 product["event_content"] = ",".join(parseEventContent.List_ECs_forProduct(product))
00101         return product
00102

def cmsPerfSuiteHarvest::exportSequences ( )

Exports the sequences to XML Doc

Definition at line 235 of file cmsPerfSuiteHarvest.py.

00236                      :
00237     """ Exports the sequences to XML Doc """
00238     try:
00239         env_cmssw_version = os.environ["CMSSW_VERSION"]
00240     except KeyError:
00241         print "<<<<<  ====== Error: cannot get CMSSW version [just integrity check for sequences]. \
00242                                          Is the CMSSW environment initialized? (use cmsenv) ==== >>>>"
00243         env_cmssw_version = None
00244 
00245     print " ==== exporting the sequences. loading files for currently loaded CMSSW version: %s, while the CMSSW we are currently harversting is %s ===" %(env_cmssw_version, release)
00246     xml_export_Sequences(xml_doc = xmldoc, sequences = get_modules_sequences_relationships(), release=release)
00247 
00248

def cmsPerfSuiteHarvest::exportTimeSizeJob	(	path,
		timeSizeReport,
		runinfo
	)

Definition at line 108 of file cmsPerfSuiteHarvest.py.

00109                                                      :
00110                 candleLong = os.path.split(path)[1].replace("_TimeSize", "").replace("_PU", "")
00111                 jobID = timeSizeReport["jobID"]
00112                 print candleLong
00113 
00114                 #search for a run Test to which could belong our JOB
00115                 found = False
00116                 if runinfo['TestResults'].has_key('TimeSize'):
00117                         for result in runinfo['TestResults']['TimeSize']:
00118                                 #print result
00119                                 """ If this is the testResult which fits TimeSize job """
00120                                 #TODO: we do not check teh step when assigning because of the different names, check if this is really OK. make a decission which step name to use later, long or short one
00121                                 #and jobID["step"] in result['steps'].split(parserPerfsuiteMetadata._LINE_SEPARATOR)
00122                                 if result['candle'] == candleLong  and jobID["pileup_type"] == result['pileup_type'] and jobID["conditions"] == result['conditions'] and jobID["event_content"] == result['event_content']:
00123                                         #print result
00124                                         if not result.has_key("jobs"):
00125                                                 result['jobs'] = []
00126                                         result['jobs'].append(timeSizeReport)
00127                                         found = True
00128                                         break
00129                 
00130                 if not found:
00131                         print "============ (almost) ERROR: NOT FOUND THE ENTRY in cmsPerfSuite.log, exporting as separate entry ======== "
00132                         print "JOB ID: %s " % str(jobID)
00133                         print " ====================== "
00134                         runinfo['unrecognized_jobs'].append(timeSizeReport)
00135                         #export_xml(xml_doc = xmldoc, **timeSizeReport) 
00136

def cmsPerfSuiteHarvest::get_modules_sequences_relationships ( )

Definition at line 103 of file cmsPerfSuiteHarvest.py.

00104                                          :
00105         (sequenceWithModules, sequenceWithModulesString) =ModuleToSequenceAssign.assignModulesToSeqs()
00106         return [{"name": seq, "modules": ",".join(modules)} for (seq, modules) in sequenceWithModulesString.items()]
00107

def cmsPerfSuiteHarvest::get_params ( argv )

Returns the version of CMSSW to be used which it is taken from:
* command line parameter or 
* environment variable 
in case of error returns None

    And also the directory to put the xml files to: if none --> returns ""

try to get the version for command line argument

Definition at line 40 of file cmsPerfSuiteHarvest.py.

00041                     :
00042     """ 
00043     Returns the version of CMSSW to be used which it is taken from:
00044     * command line parameter or 
00045     * environment variable 
00046     in case of error returns None
00047 
00048         And also the directory to put the xml files to: if none --> returns ""
00049     """
00050     
00051     """ try to get the version for command line argument """
00052     #print argv
00053     #FIXME: this should be rewritten using getopt properly
00054     version = None
00055     #xml_dir = "cmsperfvm:/data/projects/conf/PerfSuiteDB/xml_dropbox" #Set this as default (assume change in write_xml to write to remote machines)
00056     #NB write_xml is in Validation/Performance/python/cmssw_exportdb_xml.py
00057     #Setting the default to write to a local directory:
00058     xml_dir="PerfSuiteDBData"
00059     try:                              
00060         opts, args = getopt.getopt(argv[1:], "v:", ["version=", "outdir="])
00061     except getopt.GetoptError, e:  
00062         print e
00063     for opt, arg in opts:
00064         if opt in ("-v", "--version"):
00065             version = arg
00066         if opt == "--outdir":
00067              xml_dir = arg
00068     
00069     """ if not get it from environment string """
00070     if not version:
00071         try:
00072             version = os.environ["CMSSW_VERSION"]
00073         except KeyError:
00074             pass
00075     
00076     return (version, xml_dir)

def cmsPerfSuiteHarvest::process_timesize_dir	(	path,
		runinfo
	)

Definition at line 137 of file cmsPerfSuiteHarvest.py.

00138                                        :
00139         global release,event_content,conditions
00140         """ if the release is not provided explicitly we take it from the Simulation candles file """
00141         if (not release):
00142                 release_fromlogfile = read_SimulationCandles(path)
00143                 release  = release_fromlogfile 
00144                 print "release from simulation candles: %s" % release
00145         
00146         if (not release):
00147                 # TODO: raise exception!
00148                 raise Exception("the release was not found!")
00149 
00150 
00151         """ process the TimingReport log files """
00152 
00153         # get the file list 
00154         files = os.listdir(path)
00155         timing_report_files = [os.path.join(path, f) for f in files
00156                                  if test_timing_report_log.search(f) 
00157                                         and os.path.isfile(os.path.join(path, f)) ]
00158 
00159         # print timing_report_files
00160         for timelog_f in timing_report_files:
00161                 print "\nProcessing file: %s" % timelog_f
00162                 print "------- "
00163                 
00164                 jobID = getJobID_fromTimeReportLogName(os.path.join(path, timelog_f))
00165                 print "jobID: %s" % str(jobID)
00166                 (candle, step, pileup_type, conditions, event_content) = jobID
00167                 jobID = dict(zip(("candle", "step", "pileup_type", "conditions", "event_content"), jobID))
00168                 print "Dictionary based jobID %s: " % str(jobID)
00169                 
00170                 #if any of jobID fields except (isPILEUP) is empty we discard the job as all those are the jobID keys and we must have them
00171                 discard = len([key for key, value in jobID.items() if key != "pileup_type" and not value])
00172                 if discard:
00173                         print " ====================== The job HAS BEEN DISCARDED =============== "
00174                         print " NOT ALL DATA WAS AVAILABLE "
00175                         print " JOB ID = %s " % str(jobID)
00176                         print " ======================= end ===================================== "
00177                         continue
00178 
00179                 num_events = read_ConfigurationFromSimulationCandles(path = path, step = step, is_pileup = pileup_type)["num_events"]
00180                 # TODO: automaticaly detect type of report file!!!
00181                 (mod_timelog, evt_timelog, rss_data, vsize_data) =loadTimeLog(timelog_f)
00182         
00183                 mod_timelog= processModuleTimeLogData(mod_timelog, groupBy = "module_name")
00184                 print "Number of modules grouped by (module_label+module_name): %s" % len(mod_timelog)
00185 
00186                 # add to the list to generate the readable filename :)
00187                 steps[step] = 1
00188                 candles[candle] = 1
00189                 if pileup_type=="":
00190                     pileups["NoPileUp"]=1
00191                 else:
00192                     pileups[pileup_type] = 1
00193         
00194                 # root file size (number)
00195                 root_file_size = getRootFileSize(path = path, candle = candle, step = step)
00196 
00197                 #EdmSize
00198                 edm_report = parserEdmSize.getEdmReport(path = path, candle = candle, step = step)
00199                 if edm_report != False:
00200                         try:
00201                                 # add event content data
00202                                 edm_report  = map(assign_event_content_for_product, edm_report)
00203                                 # for testing / imformation
00204                                 _eventContent_DEBUG(edm_report)
00205                         except Exception, e:
00206                                 print e
00207 
00208 
00209                 timeSizeReport = {
00210                                 "jobID":jobID,
00211                                 "release": release, 
00212                                 "timelog_result": (mod_timelog, evt_timelog, rss_data, vsize_data), 
00213                                 "metadata": {"root_file_size": root_file_size, "num_events": num_events}, 
00214                                 "edmSize_result": edm_report 
00215                 }
00216                 
00217                 # export to xml: actualy exporting gets suspended and put into runinfo
00218                 exportTimeSizeJob(path, timeSizeReport,  runinfo)
00219 
#TimeSize

def cmsPerfSuiteHarvest::searchTimeSizeFiles ( runinfo )

so far we will use the current dir to search in

Definition at line 220 of file cmsPerfSuiteHarvest.py.

00221                                 :
00222         """ so far we will use the current dir to search in """
00223         path = os.getcwd()
00224         #print path
00225         print 'full path =', os.path.abspath(path)
00226 
00227         files = os.listdir(path)
00228         
00229         test_timeSizeDirs = re.compile("_TimeSize$", re.IGNORECASE)          
00230         timesize_dirs = [os.path.join(path, f) for f in files if test_timeSizeDirs.search(f) and os.path.isdir(os.path.join(path, f))]
00231         
00232         for timesize_dir in timesize_dirs:
00233                 # print timesize_dir
00234                 process_timesize_dir(timesize_dir, runinfo)

def cmsPerfSuiteHarvest::usage ( argv )

Definition at line 26 of file cmsPerfSuiteHarvest.py.

00027                :
00028     script = argv[0]
00029     return """
00030     Usage: %(script)s [-v cmssw_version] [--version=cmssw_version]
00031     
00032     if the cmssw version is in the system's environment (after running cmsenv):
00033     $ %(script)s 
00034     
00035     otherwise one must specify the cmssw version:
00036     $ %(script)s --version=CMSSW_3_2_0
00037     $ %(script)s -v CMSSW_3_2_0    
00038     
00039     """ % locals()