CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_9_patch3/src/Validation/Performance/scripts/cmsPerfSuite.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 import os, time, sys, re, glob, exceptions
00003 import optparse as opt
00004 import cmsRelRegress as crr
00005 from cmsPerfCommons import Candles, KeywordToCfi, CandFname, cmsDriverPileUpOption, getVerFromLog
00006 import cmsRelValCmd,cmsCpuInfo
00007 import threading #Needed in threading use for Valgrind
00008 import subprocess #Nicer subprocess management than os.popen
00009 import datetime #Used to time the running of the performance suite
00010 import pickle #Used to dump the running timing information
00011 
00012 #Redefine _cleanup() function not to poll active processes
00013 #[This is necessary to avoid issues when threading]
00014 #So let's have it do nothing:
00015 def _cleanup():
00016    pass
00017 #Override the function in subprocess
00018 subprocess._cleanup=_cleanup
00019 
00020 class PerfThread(threading.Thread):
00021     def __init__(self,**args):
00022         self.args=args
00023         threading.Thread.__init__(self)
00024     def run(self):
00025         self.suite=PerfSuite()
00026         #print "Arguments inside the thread instance:"
00027         #print type(self.args)
00028         #print self.args
00029         self.suite.runPerfSuite(**(self.args))#self.args)
00030 
00031 class PerfSuiteTimer:
00032    """A class defining timing objects to time the running of the various parts of the performance suite. The class depends on module datetime."""
00033    def __init__(self,start=None):
00034       """Initialize the start time and set the end time to some indefinite time in the future"""
00035       self.start = start
00036       self.end = datetime.datetime.max
00037       self.duration = self.start - self.end
00038 
00039    #Setters:
00040    def set_start(self,start=None):
00041       self.start = start
00042    def set_end(self,end=None):
00043       #print "Setting end time to %s"%end.ctime()
00044       self.end = end
00045       self.duration = self.end - self.start
00046    #Getters
00047    def get_start(self):
00048       """Return the start time in ctime timestamp format"""
00049       return self.start.ctime()
00050    def get_end(self):
00051       """Return the end time in ctime timestamp format"""
00052       return self.end.ctime()
00053    def get_duration(self):
00054       """Return the duration between start and end as a dictionary with keys 'hours', 'minutes', 'seconds' to express the total duration in the favourite (most appropriate) unit. The function returns truncated integers."""
00055       self.duration_seconds = self.duration.days*86400 + self.duration.seconds
00056       self.duration_minutes = self.duration_seconds/60
00057       self.duration_hours = self.duration_seconds/3600
00058       return {'hours':self.duration_hours, 'minutes':self.duration_minutes, 'seconds':self.duration_seconds}
00059 
00060 class PerfSuite:
00061     def __init__(self):
00062         
00063         self.ERRORS = 0
00064         #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and  root://eoscms//eos/ instead of /castor/cern.ch/
00065         #NOT YET!
00066         #FIX ME... do the migration to EOS eventually, taking care of PerFDB implications for tarball location!
00067         self._CASTOR_DIR = "/castor/cern.ch/cms/store/relval/performance/"
00068         self._dryrun   = False
00069         self._debug    = False
00070         self._unittest = False
00071         self._noexec   = False
00072         self._verbose  = True
00073         self.logh = sys.stdout
00074     
00075         #Get some environment variables to use
00076         try:
00077             self.cmssw_arch   = os.environ["SCRAM_ARCH"]
00078             self.cmssw_version= os.environ["CMSSW_VERSION"]
00079             self.host         = os.environ["HOST"]
00080             self.user              = os.environ["USER"]
00081         except KeyError:
00082             self.logh.write('Error: An environment variable either SCRAM_ARCH, CMSSW_VERSION, HOST or USER is not available.\n')
00083             self.logh.write('       Please run eval `scramv1 runtime -csh` to set your environment variables\n')
00084             self.logh.flush()
00085             sys.exit()
00086            
00087         #Scripts used by the suite:
00088         self.Scripts         =["cmsDriver.py","cmsRelvalreport.py","cmsRelvalreportInput.py","cmsScimark2"]
00089         self.AuxiliaryScripts=["cmsScimarkLaunch.csh","cmsScimarkParser.py","cmsScimarkStop.py"]
00090 
00091         
00092     #Threading the execution of IgProf, Memcheck and Callgrind using the same model used to thread the whole performance suite:
00093     #1-Define a class simpleGenReportThread() that has relevant methods needed to handle PerfTest()
00094     #2-Instantiate one with the necessary arguments to run simpleGenReport on core N
00095     #3-Execute its "run" method by starting the thread
00096     #Simplest way maybe is to keep 2 global lists:
00097     #AvailableCores
00098     #TestsToDo
00099     #PerfSuite will fill the TestsToDo list with dictionaries, to be used as keyword arguments to instantiate a relevant thread.
00100     #Once all the TestsToDo are "scheduled" into the list (FirstInLastOut buffer since we use pop()) PerfSuite will look into the
00101     #AvailableCores list and start popping cores onto which to instantiate the relevant threads, then it will start the thread,
00102     #appending it to the activePerfTestThread{},a dictionary with core as key and thread object as value, to facilitate bookkeeping.
00103     #An infinite loop will take care of checking for AvailableCores as long as there are TestsToDo and keep submitting.
00104     #In the same loop the activePerfTestThread{} will be checked for finished threads and it will re-append the relevant cpu back
00105     #to the AvailableCores list.
00106     #In the same loop a check for the case of all cores being back into AvailableCores with no more TestsToDo will break the infinite loop
00107     #and declare the end of all tests.As else to this if a sleep statement of 5 seconds will delay the repetition of the loop.
00108 
00109     def createIgVolume(self):
00110        igcommand = '/afs/cern.ch/cms/sdt/internal/scripts/requestPerfIgprofSpace.py --version ' + self.cmssw_version + ' --platform ' + self.cmssw_arch
00111        subprocess.Popen(igcommand,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
00112 
00113 
00114     class simpleGenReportThread(threading.Thread):
00115        def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs): #Passing around the perfsuite object to be able to access simpleGenReport
00116           self.cpu=cpu
00117           self.simpleGenReportArgs=simpleGenReportArgs
00118           self.perfsuiteinstance=perfsuiteinstance
00119           threading.Thread.__init__(self)
00120        def run(self):
00121           self.PerfTest=self.perfsuiteinstance.PerfTest(self.cpu,self.perfsuiteinstance,**(self.simpleGenReportArgs))
00122           self.PerfTest.runPerfTest()
00123     
00124     class PerfTest:
00125        def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs):
00126           self.cpu=cpu
00127           self.simpleGenReportArgs=simpleGenReportArgs
00128           self.perfsuiteinstance=perfsuiteinstance
00129        def runPerfTest(self):
00130 #          self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
00131 #          TimerInfo.update({self.simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary  
00132           if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
00133              self.perfsuiteinstance.logh.write("Launching the PILE UP %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
00134              self.PerfTestPUTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
00135              TimerInfo[self.simpleGenReportArgs['Name']].update({'PileUpTime':self.PerfTestPUTimer}) #Add the TimeSize timer to the dictionary
00136              
00137           else:
00138              self.perfsuiteinstance.logh.write("Launching the %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
00139              self.PerfTestTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
00140              TimerInfo[self.simpleGenReportArgs['Name']].update({'NoPileUpTime':self.PerfTestTimer}) #Add the TimeSize timer to the dictionary
00141           self.perfsuiteinstance.logh.flush()
00142           #Cut and paste in bulk, should see if this works...
00143           self.perfsuiteinstance.printDate()
00144           self.perfsuiteinstance.logh.flush()
00145           self.exitcode=self.perfsuiteinstance.simpleGenReport([self.cpu],**(self.simpleGenReportArgs)) #Returning ReportExit code
00146           #Stop the timers on the threaded PileUp and NoPileUp tests:
00147           if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
00148              self.PerfTestPUTimer.set_end(datetime.datetime.now())
00149           else:
00150              self.PerfTestTimer.set_end(datetime.datetime.now())
00151           return self.exitcode
00152        
00153     #Options handling
00154     def optionParse(self,argslist=None):
00155         parser = opt.OptionParser(usage='''./cmsPerfSuite.py [options]
00156            
00157     Examples:
00158     
00159     cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunDigiPileUp TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent FEVTDEBUGHLT --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
00160     (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP for all TTbar tests defined, i.e. 5 TimeSize, 2 IgProf, 1 Callgrind, 5 Memcheck. The file /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root will be copied locally as INPUT_PILEUP_EVENTS.root and it will be used as the input file for the MixingModule pile up events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
00161     OR
00162     cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root
00163     (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
00164     OR
00165     cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
00166     (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once. It will also add the options "--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All" to all cmsDriver.py commands executed by the suite. In addition it will run only 2 cmsDriver.py "steps": "GEN,SIM" and "DIGI". Note the syntax GEN-SIM for combined cmsDriver.py steps)
00167     
00168     Legal entries for individual candles (--RunTimeSize, --RunIgProf, --RunCallgrind, --RunMemcheck options):
00169     %s
00170     ''' % ("\n".join(Candles)))
00171     
00172         parser.set_defaults(TimeSizeEvents   = 0        ,
00173                             IgProfEvents     = 0          ,
00174                             CallgrindEvents  = 0          ,
00175                             MemcheckEvents   = 0          ,
00176                             cmsScimark       = 10         ,
00177                             cmsScimarkLarge  = 10         ,  
00178                             cmsdriverOptions = "--eventcontent FEVTDEBUGHLT", # Decided to avoid using the automatic parsing of cmsDriver_highstats_hlt.txt: cmsRelValCmd.get_cmsDriverOptions(), #Get these options automatically now!
00179                             #"Release Integrators" will create another file relative to the performance suite and the operators will fetch from that file the --cmsdriver option... for now just set the eventcontent since that is needed in order for things to run at all now...
00180                             stepOptions      = ""         ,
00181                             profilers        = ""         ,
00182                             outputdir        = ""         ,
00183                             logfile          = os.path.join(os.getcwd(),"cmsPerfSuite.log"),
00184                             runonspare       = True       ,
00185                             bypasshlt        = False      ,
00186                             quicktest        = False      ,
00187                             unittest         = False      ,
00188                             noexec           = False      ,
00189                             dryrun           = False      ,
00190                             verbose          = True       ,
00191                             create           = False      ,
00192                             previousrel      = ""         ,
00193                             castordir        = self._CASTOR_DIR,
00194                             cores            = cmsCpuInfo.get_NumOfCores(), #Get Number of cpu cores on the machine from /proc/cpuinfo
00195                             cpu              = "1"        , #Cpu core on which the suite is run:
00196                             RunTimeSize      = ""         ,
00197                             RunIgProf        = ""         ,
00198                             RunCallgrind     = ""         ,
00199                             RunMemcheck      = ""         ,
00200                             RunDigiPileUP    = ""         ,
00201                             RunTimeSizePU    = ""         ,
00202                             RunIgProfPU      = ""         ,
00203                             RunCallgrindPU   = ""         ,
00204                             RunMemcheckPU    = ""         ,
00205                             PUInputFile      = ""         ,
00206                             userInputFile    = ""         )
00207         parser.add_option('--createIgVol', action="store_true", dest='create',
00208             help = 'Create IgProf AFS volume for the release and architecture')
00209         parser.add_option('-q', '--quiet'      , action="store_false", dest='verbose'   ,
00210             help = 'Output less information'                  )
00211         parser.add_option('-b', '--bypass-hlt' , action="store_true" , dest='bypasshlt' ,
00212             help = 'Bypass HLT root file as input to RAW2DIGI')
00213         parser.add_option('-n', '--notrunspare', action="store_false", dest='runonspare',
00214             help = 'Do not run cmsScimark on spare cores')        
00215         parser.add_option('-t', '--timesize'  , type='int'   , dest='TimeSizeEvents'  , metavar='<#EVENTS>'   ,
00216             help = 'specify the number of events for the TimeSize tests'                   )
00217         parser.add_option('-i', '--igprof'    , type='int'   , dest='IgProfEvents'    , metavar='<#EVENTS>'   ,
00218             help = 'specify the number of events for the IgProf tests'                     )
00219         parser.add_option('-c', '--callgrind'  , type='int'   , dest='CallgrindEvents'  , metavar='<#EVENTS>'   ,
00220             help = 'specify the number of events for the Callgrind tests'                   )
00221         parser.add_option('-m', '--memcheck'  , type='int'   , dest='MemcheckEvents'  , metavar='<#EVENTS>'   ,
00222             help = 'specify the number of events for the Memcheck tests'                   )
00223         parser.add_option('--cmsScimark'      , type='int'   , dest='cmsScimark'      , metavar=''            ,
00224             help = 'specify the number of times the cmsScimark benchmark is run before and after the performance suite on cpu1')
00225         parser.add_option('--cmsScimarkLarge' , type='int'   , dest='cmsScimarkLarge' , metavar=''            ,
00226             help = 'specify the number of times the cmsScimarkLarge benchmark is run before and after the performance suite on cpu1')
00227         parser.add_option('--cores'           , type='int', dest='cores'              , metavar='<CORES>'     ,
00228             help = 'specify the number of cores of the machine (can be used with 0 to stop cmsScimark from running on the other cores)')        
00229         parser.add_option('--cmsdriver' , type='string', dest='cmsdriverOptions', metavar='<OPTION_STR>',
00230             help = 'specify special options to use with the cmsDriver.py commands (designed for integration build use')        
00231         parser.add_option('-a', '--archive'   , type='string', dest='castordir'       , metavar='<DIR>'       ,
00232             help = 'specify the wanted CASTOR directory where to store the results tarball')
00233         parser.add_option('-L', '--logfile'   , type='string', dest='logfile'         , metavar='<FILE>'      ,
00234             help = 'file to store log output of the script')                
00235         parser.add_option('-o', '--output'    , type='string', dest='outputdir'       , metavar='<DIR>'       ,
00236             help = 'specify the directory where to store the output of the script')        
00237         parser.add_option('-r', '--prevrel'   , type='string', dest='previousrel'     , metavar='<DIR>'       ,
00238             help = 'Top level dir of previous release for regression analysis')        
00239         parser.add_option('--step'            , type='string', dest='stepOptions'     , metavar='<STEPS>'     ,
00240             help = 'specify the processing steps intended (instead of the default ones)' )
00241         parser.add_option('--cpu'             , type='string', dest='cpu'             , metavar='<CPU>'       ,
00242             help = 'specify the core on which to run the performance suite')
00243 
00244         #Adding new options to put everything configurable at command line:
00245         parser.add_option('--RunTimeSize'             , type='string', dest='RunTimeSize' , metavar='<CANDLES>'       ,
00246             help = 'specify on which candles to run the TimeSize tests')
00247         parser.add_option('--RunIgProf'             , type='string', dest='RunIgProf' , metavar='<CANDLES>'       ,
00248             help = 'specify on which candles to run the IgProf tests')
00249         parser.add_option('--RunCallgrind'             , type='string', dest='RunCallgrind' , metavar='<CANDLES>'       ,
00250             help = 'specify on which candles to run the Callgrind tests')
00251         parser.add_option('--RunMemcheck'             , type='string', dest='RunMemcheck' , metavar='<CANDLES>'       ,
00252             help = 'specify on which candles to run the Memcheck tests')
00253         parser.add_option('--RunDigiPileUp'             , type='string', dest='RunDigiPileUp' , metavar='<CANDLES>'       ,
00254             help = 'specify the candle on which to run DIGI PILE UP and repeat all the tests set to run on that candle with PILE UP')
00255         parser.add_option('--PUInputFile'             , type='string', dest='PUInputFile' , metavar='<FILE>'       ,
00256             help = 'specify the root file to pick the pile-up events from')
00257         parser.add_option('--RunTimeSizePU'             , type='string', dest='RunTimeSizePU' , metavar='<CANDLES>'       ,
00258             help = 'specify on which candles to run the TimeSize tests with PILE UP')
00259         parser.add_option('--RunIgProfPU'             , type='string', dest='RunIgProfPU' , metavar='<CANDLES>'       ,
00260             help = 'specify on which candles to run the IgProf tests with PILE UP')
00261         parser.add_option('--RunCallgrindPU'             , type='string', dest='RunCallgrindPU' , metavar='<CANDLES>'       ,
00262             help = 'specify on which candles to run the Callgrind tests with PILE UP')
00263         parser.add_option('--RunMemcheckPU'             , type='string', dest='RunMemcheckPU' , metavar='<CANDLES>'       ,
00264             help = 'specify on which candles to run the Memcheck tests with PILE UP')
00265 
00266         #Adding a filein option to use pre-processed RAW file for RECO and HLT:
00267         parser.add_option('--filein'             , type='string', dest='userInputFile' , metavar='<FILE>', #default="",
00268             help = 'specify input RAW root file for HLT and RAW2DIGI-RECO (list the files in the same order as the candles for the tests)')
00269 
00270         #Adding an option to handle additional (to the default user) email addresses to the email notification list (that sends the cmsPerfSuite.log once the performance suite is done running):
00271         parser.add_option('--mail', type='string', dest='MailLogRecipients', metavar='<EMAIL ADDRESS>', default=self.user, help='specify valid email address(es) name@domain in order to receive notification at the end of the performance suite running with the cmsPerfSuite.log file')
00272 
00273         #Adding option to turn off tarball creation at the end of the execution of the performance suite:
00274         parser.add_option('--no_tarball', action="store_false", dest='tarball', default=True, help='Turn off automatic tarball creation at the end of the performance suite execution')
00275                 
00276         #####################
00277         #    
00278         # Developer options
00279         #
00280     
00281         devel  = opt.OptionGroup(parser, "Developer Options",
00282                                          "Caution: use these options at your own risk."
00283                                          "It is believed that some of them bite.\n")
00284     
00285         devel.add_option('-p', '--profile'  , type="str" , dest='profilers', metavar="<PROFILERS>" ,
00286             help = 'Profile codes to use for cmsRelvalInput' )
00287         devel.add_option('-f', '--false-run', action="store_true", dest='dryrun'   ,
00288             help = 'Dry run'                                                                                           )            
00289         devel.add_option('-d', '--debug'    , action='store_true', dest='debug'    ,
00290             help = 'Debug'                                                                                             )
00291         devel.add_option('--quicktest'      , action="store_true", dest='quicktest',
00292             help = 'Quick overwrite all the defaults to small numbers so that we can run a quick test of our chosing.' )  
00293         devel.add_option('--test'           , action="store_true", dest='unittest' ,
00294             help = 'Perform a simple test, overrides other options. Overrides verbosity and sets it to false.'         )            
00295         devel.add_option('--no_exec'           , action="store_true", dest='noexec' ,
00296             help = 'Run the suite without executing the cmsRelvalreport.py commands in the various directories. This is a useful debugging tool.'         )
00297         parser.add_option_group(devel)
00298         (options, args) = parser.parse_args(argslist)
00299     
00300     
00301         self._debug           = options.debug
00302         self._unittest        = options.unittest
00303         self._noexec          = options.noexec
00304         self._verbose         = options.verbose
00305         self._dryrun          = options.dryrun
00306         create           = options.create
00307         castordir        = options.castordir
00308         TimeSizeEvents   = options.TimeSizeEvents
00309         IgProfEvents     = options.IgProfEvents
00310         CallgrindEvents  = options.CallgrindEvents
00311         MemcheckEvents   = options.MemcheckEvents
00312         cmsScimark       = options.cmsScimark
00313         cmsScimarkLarge  = options.cmsScimarkLarge
00314         cmsdriverOptions = options.cmsdriverOptions
00315         stepOptions      = options.stepOptions
00316         quicktest        = options.quicktest
00317         #candleoption     = options.candleOptions
00318         runonspare       = options.runonspare
00319         profilers        = options.profilers.strip()
00320         cpu              = options.cpu.strip()
00321         bypasshlt        = options.bypasshlt
00322         cores            = options.cores
00323         logfile          = options.logfile
00324         prevrel          = options.previousrel
00325         outputdir        = options.outputdir
00326         RunTimeSize      = options.RunTimeSize
00327         RunIgProf        = options.RunIgProf
00328         RunCallgrind     = options.RunCallgrind
00329         RunMemcheck      = options.RunMemcheck
00330         RunDigiPileUp    = options.RunDigiPileUp
00331         RunTimeSizePU    = options.RunTimeSizePU
00332         RunIgProfPU      = options.RunIgProfPU
00333         RunCallgrindPU   = options.RunCallgrindPU
00334         RunMemcheckPU    = options.RunMemcheckPU
00335         PUInputFile      = options.PUInputFile
00336         userInputFile    = options.userInputFile
00337         if options.MailLogRecipients !="" and self.user not in options.MailLogRecipients: #To allow for the --mail "" case of suppressing the email and the default user case
00338            MailLogRecipients= self.user+","+options.MailLogRecipients #Add the user by default if there is a mail report
00339         else:
00340            MailLogRecipients=options.MailLogRecipients
00341         tarball          = options.tarball
00342     
00343         #################
00344         # Check logfile option
00345         #
00346         if not logfile == None:
00347             logfile = os.path.abspath(logfile)
00348             logdir = os.path.dirname(logfile)
00349             if not os.path.exists(logdir):
00350                 parser.error("Directory to output logfile does not exist")
00351                 sys.exit()
00352             logfile = os.path.abspath(logfile)
00353     
00354         #############
00355         # Check step Options
00356         #
00357         if "GEN,SIM" in stepOptions:
00358             self.logh.write("WARNING: Please use GEN-SIM with a hypen not a \",\"!\n")
00359         #Using the step option as a switch between different dictionaries for:
00360         #RunTimeSize,RunIgProf,RunCallgrind,RunMemCheck,RunDigiPileUp:
00361         if stepOptions == "" or stepOptions == 'Default':
00362             pass
00363         else:
00364             stepOptions='--usersteps=%s' % (stepOptions)        
00365     
00366         ###############
00367         # Check profile option
00368         #
00369         isnumreg = re.compile("^-?[0-9]*$")
00370         found    = isnumreg.search(profilers)
00371         if not found :
00372             parser.error("profile codes option contains non-numbers")
00373             sys.exit()
00374     
00375         ###############
00376         # Check output directory option
00377         #
00378         if outputdir == "":
00379             outputdir = os.getcwd()
00380         else:
00381             outputdir = os.path.abspath(outputdir)
00382     
00383         if not os.path.isdir(outputdir):
00384             parser.error("%s is not a valid output directory" % outputdir)
00385             sys.exit()
00386             
00387         ################
00388         # Check cpu option
00389         # 
00390         numetcomreg = re.compile("^[0-9,]*")
00391         if not numetcomreg.search(cpu):
00392             parser.error("cpu option needs to be a comma separted list of ints or a single int")
00393             sys.exit()
00394     
00395         cpustr = cpu
00396         cpu = []
00397         if "," in cpustr:
00398             cpu = map(lambda x: int(x),cpustr.split(","))
00399         else:
00400             cpu = [ int(cpustr)  ]
00401     
00402         ################
00403         # Check previous release directory
00404         #
00405         if not prevrel == "":
00406             prevrel = os.path.abspath(prevrel)
00407             if not os.path.exists(prevrel):
00408                 self.logh.write("ERROR: Previous release dir %s could not be found" % prevrel)
00409                 sys.exit()
00410     
00411         #############
00412         # Setup quicktest option
00413         #
00414         if quicktest:
00415             TimeSizeEvents = 1
00416             IgProfEvents = 1
00417             CallgrindEvents = 0
00418             MemcheckEvents = 0
00419             cmsScimark = 1
00420             cmsScimarkLarge = 1
00421     
00422         #############
00423         # Setup unit test option
00424         #
00425         if self._unittest:
00426             self._verbose = False
00427             if stepOptions == "":
00428                 stepOptions = "GEN-SIM,DIGI,L1,DIGI2RAW,HLT,RAW2DIGI-RECO"
00429             cmsScimark      = 0
00430             cmsScimarkLarge = 0
00431             CallgrindEvents  = 0
00432             MemcheckEvents  = 0
00433             IgProfEvents    = 0
00434             TimeSizeEvents  = 1
00435         
00436         #Split all the RunTimeSize etc candles in lists:
00437         TimeSizeCandles=[]
00438         IgProfCandles=[]
00439         CallgrindCandles=[]
00440         MemcheckCandles=[]
00441         TimeSizePUCandles=[]
00442         IgProfPUCandles=[]
00443         CallgrindPUCandles=[]
00444         MemcheckPUCandles=[]
00445         userInputRootFiles=[]
00446         if RunTimeSize:
00447             TimeSizeCandles = RunTimeSize.split(",")
00448         if RunIgProf:
00449             IgProfCandles = RunIgProf.split(",")
00450         if RunCallgrind:
00451             CallgrindCandles = RunCallgrind.split(",")
00452         if RunMemcheck:
00453             MemcheckCandles = RunMemcheck.split(",")
00454         if RunDigiPileUp:
00455             for candle in RunDigiPileUp.split(","):
00456                 if candle in TimeSizeCandles:
00457                     TimeSizePUCandles.append(candle)
00458                 if candle in IgProfCandles:
00459                     IgProfPUCandles.append(candle)
00460                 if candle in CallgrindCandles:
00461                     CallgrindPUCandles.append(candle)
00462                 if candle in MemcheckCandles:
00463                     MemcheckPUCandles.append(candle)
00464         if RunTimeSizePU:
00465             TimeSizePUCandles.extend(RunTimeSizePU.split(","))
00466             #Some smart removal of duplicates from the list!
00467             temp=set(TimeSizePUCandles)
00468             TimeSizePUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
00469         if RunIgProfPU:
00470             IgProfPUCandles.extend(RunIgProfPU.split(","))
00471             #Some smart removal of duplicates from the list!
00472             temp=set(IgProfPUCandles)
00473             IgProfPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
00474         if RunCallgrindPU:
00475             CallgrindPUCandles.extend(RunCallgrindPU.split(","))
00476             #Some smart removal of duplicates from the list!
00477             temp=set(CallgrindPUCandles)
00478             CallgrindPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
00479         if RunMemcheckPU:
00480             MemcheckPUCandles.extend(RunMemcheckPU.split(","))
00481             #Some smart removal of duplicates from the list!
00482             temp=set(MemcheckPUCandles)
00483             MemcheckPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
00484         if userInputFile:
00485            userInputRootFiles=userInputFile.split(",")
00486 
00487            
00488 
00489         #############
00490         # Setup cmsdriver and eventual cmsdriverPUoption
00491         #
00492         cmsdriverPUOptions=""
00493         if cmsdriverOptions:
00494             #Set the eventual Pile Up cmsdriver options first:
00495             if TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles:
00496                 #Bug fixed: no space between --pileup= and LowLumiPileUp (otherwise could omit the =)
00497                 cmsdriverPUOptions = '--cmsdriver="%s %s%s"'%(cmsdriverOptions," --pileup=",cmsDriverPileUpOption)
00498             #Set the regular ones too:
00499             cmsdriverOptions = '--cmsdriver="%s"'%cmsdriverOptions        
00500     
00501         return (create          ,
00502                 castordir       ,
00503                 TimeSizeEvents  ,
00504                 IgProfEvents    ,
00505                 CallgrindEvents ,
00506                 MemcheckEvents  ,
00507                 cmsScimark      ,
00508                 cmsScimarkLarge ,
00509                 cmsdriverOptions,
00510                 cmsdriverPUOptions,
00511                 stepOptions     ,
00512                 quicktest       ,
00513                 profilers       ,
00514                 cpu             ,
00515                 cores           ,
00516                 prevrel         ,
00517                 bypasshlt       ,
00518                 runonspare      ,
00519                 outputdir       ,
00520                 logfile         ,
00521                 TimeSizeCandles ,
00522                 IgProfCandles   ,
00523                 CallgrindCandles,
00524                 MemcheckCandles ,
00525                 TimeSizePUCandles ,
00526                 IgProfPUCandles   ,
00527                 CallgrindPUCandles,
00528                 MemcheckPUCandles ,
00529                 PUInputFile     ,
00530                 userInputRootFiles,
00531                 MailLogRecipients,
00532                 tarball)
00533     
00534     #def usage(self):
00535     #    return __doc__
00536     
00537     ############
00538     # Run a list of commands using system
00539     # ! We should rewrite this not to use system (most cases it is unnecessary)
00540     def runCmdSet(self,cmd):
00541         exitstat = 0
00542         if len(cmd) <= 1:
00543             exitstat = self.runcmd(cmd)
00544             if self._verbose:
00545                 self.printFlush(cmd)
00546         else:
00547             for subcmd in cmd:
00548                 if self._verbose:
00549                     self.printFlush(subcmd)
00550             exitstat = self.runcmd(" && ".join(cmd))
00551         if self._verbose:
00552             self.printFlush(self.getDate())
00553         return exitstat
00554     
00555     #############
00556     # Print and flush a string (for output to a log file)
00557     #
00558     def printFlush(self,command):
00559         if self._verbose:
00560             self.logh.write(str(command) + "\n")
00561             self.logh.flush()
00562     
00563     #############
00564     # Run a command and return the exit status
00565     #
00566     def runcmd(self,command):
00567         #Substitute popen with subprocess.Popen!
00568         #Using try/except until Popen becomes thread safe (it seems that everytime it is called
00569         #all processes are checked to reap the ones that are done, this creates a race condition with the wait()... that
00570         #results into an error with "No child process".
00571         #os.popen(command)
00572         try:
00573             process  = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
00574             pid=process.pid
00575             exitstat= process.wait()
00576             cmdout   = process.stdout.read()
00577             exitstat = process.returncode
00578         except OSError, detail:
00579             self.logh.write("Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s\n"%(command,pid,detail))
00580             self.logh.flush()
00581             exitstat=999
00582             cmdout="Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s"%(command,pid,detail)
00583         if self._verbose:
00584             self.logh.write(cmdout)# + "\n") No need of extra \n!
00585             self.logh.flush()
00586         if exitstat == None:
00587             self.logh.write("Something strange is going on! Exit code was None for command %s: check if it really ran!"%command)
00588             self.logh.flush()
00589             exitstat=0
00590         return exitstat
00591     
00592     def getDate(self):
00593         return time.ctime()
00594     
00595     def printDate(self):
00596         self.logh.write(self.getDate() + "\n")
00597         self.logh.flush()
00598     #############
00599     # Make directory for a particular candle and profiler.
00600     # ! This is really unnecessary code and should be replaced with a os.mkdir() call
00601     def mkCandleDir(self,pfdir,candle,profiler):
00602         adir = os.path.join(pfdir,"%s_%s" % (candle,profiler))
00603         self.runcmd( "mkdir -p %s" % adir )
00604         if self._verbose:
00605             self.printDate()
00606         return adir
00607     
00608     #############
00609     # Copy root file from another candle's directory
00610     # ! Again this is messy. 
00611 
00612     def cprootfile(self,dir,candle,NumOfEvents,cmsdriverOptions=""):
00613         cmds = ("cd %s" % dir,
00614                 "cp -pR ../%s_IgProf/%s_GEN,SIM.root ."  % (candle,CandFname[candle]))
00615         
00616         if self.runCmdSet(cmds):
00617             self.logh.write("Since there was no ../%s_IgProf/%s_GEN,SIM.root file it will be generated first\n"%(candle,CandFname[candle]))
00618 
00619             cmd = "cd %s ; cmsDriver.py %s -s GEN,SIM -n %s --fileout %s_GEN,SIM.root %s>& %s_GEN_SIM_for_valgrind.log" % (dir,KeywordToCfi[candle],str(NumOfEvents),candle,cmsdriverOptions,candle)
00620 
00621             self.printFlush(cmd)
00622             #Obsolete popen4-> subprocess.Popen
00623             #cmdout=os.popen3(cmd)[2].read()
00624             cmdout=subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
00625             if cmdout:
00626                 self.printFlush(cmdout)
00627             return cmdout
00628             
00629     #############
00630     # Display G4 cerr errors and CMSExceptions in the logfile
00631     #
00632     def displayErrors(self,file):
00633         try:
00634             for line in open(file,"r"):
00635                 if "cerr" in line or "CMSException" in line:
00636                     self.logh.write("ERROR: %s\n" % line)
00637                     self.ERRORS += 1
00638         except OSError, detail:
00639             self.logh.write("WARNING: %s\n" % detail)
00640             self.ERRORS += 1        
00641         except IOError, detail:
00642             self.logh.write("WARNING: %s\n" % detail)
00643             self.ERRORS += 1
00644         
00645     ##############
00646     # Filter lines in the valgrind report that match GEN,SIM
00647     #
00648     def valFilterReport(self,dir):
00649         #cmds = ("cd %s" % dir,
00650         #        "grep -v \"step=GEN,SIM\" SimulationCandles_%s.txt > tmp" % (self.cmssw_version),
00651         #        "mv tmp SimulationCandles_%s.txt"                         % (self.cmssw_version))
00652         #FIXME:
00653         #Quick and dirty hack to have valgrind MemCheck run on 5 events on both GEN,SIM and DIGI in QCD_80_120, while removing the line for GEN,SIM for Callgrind
00654         InputFileName=os.path.join(dir,"SimulationCandles_%s.txt"%(self.cmssw_version))
00655         InputFile=open(InputFileName,"r")
00656         InputLines=InputFile.readlines()
00657         InputFile.close()
00658         Outputfile=open(InputFileName,"w")
00659         simRegxp=re.compile("step=GEN,SIM")
00660         digiRegxp=re.compile("step=DIGI")
00661         CallgrindRegxp=re.compile("ValgrindFCE")
00662         MemcheckRegxp=re.compile("Memcheck")
00663         NumEvtRegxp=re.compile("-n 1")#FIXME Either use the ValgrindEventNumber or do a more general match!
00664         for line in InputLines:
00665             if simRegxp.search(line) and CallgrindRegxp.search(line):
00666                 continue
00667             elif simRegxp.search(line) and MemcheckRegxp.search(line):
00668                 #Modify
00669                 if NumEvtRegxp.search(line):
00670                     line=NumEvtRegxp.sub(r"-n 5",line)
00671                 else:
00672                     self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
00673                 Outputfile.write(line)
00674             elif digiRegxp.search(line) and MemcheckRegxp.search(line):
00675                 #Modify
00676                 if NumEvtRegxp.search(line):
00677                     line=NumEvtRegxp.sub(r"-n 5",line)
00678                 else:
00679                     self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
00680                 Outputfile.write(line)
00681             else:
00682                 Outputfile.write(line)
00683         self.logh.flush()
00684         Outputfile.close()
00685             
00686         #self.runCmdSet(cmds)
00687     
00688     ##################
00689     # Run cmsScimark benchmarks a number of times
00690     #
00691     def benchmarks(self,cpu,pfdir,name,bencher,large=False):
00692         cmd = self.Commands[cpu][3]
00693         redirect = ""
00694         if large:
00695             redirect = " -large >>"    
00696         else:
00697             redirect = " >>"
00698     
00699         for i in range(bencher):
00700            #Check first for the existence of the file so that we can append:
00701            if not os.path.exists(os.path.join(pfdir,os.path.basename(name))):
00702               #Equivalent of touch to make sure the file exist to be able to append to it.
00703               open(os.path.join(pfdir,os.path.basename(name)))
00704               
00705            command= cmd + redirect + os.path.join(pfdir,os.path.basename(name))        
00706            self.printFlush(command + " [%s/%s]" % (i+1,bencher))
00707            self.runcmd(command)
00708            self.logh.flush()
00709     
00710     ##################
00711     # This function is a wrapper around cmsRelvalreport
00712     # 
00713     def runCmsReport(self,cpu,dir,candle):
00714         cmd  = self.Commands[cpu][1]
00715         cmds = ("cd %s"                 % (dir),
00716                 "%s -i SimulationCandles_%s.txt -t perfreport_tmp -R -P >& %s.log" % (cmd,self.cmssw_version,candle))
00717         exitstat = 0
00718         if not self._debug:
00719             exitstat = self.runCmdSet(cmds)
00720             
00721         if self._unittest and (not exitstat == 0):
00722             self.logh.write("ERROR: CMS Report returned a non-zero exit status \n")
00723             sys.exit(exitstat)
00724         else:
00725             return(exitstat) #To return the exit code of the cmsRelvalreport.py commands to the runPerfSuite function
00726     
00727     ##################
00728     # Test cmsDriver.py (parses the simcandles file, removing duplicate lines, and runs the cmsDriver part)
00729     #
00730     def testCmsDriver(self,cpu,dir,cmsver,candle):
00731         cmsdrvreg = re.compile("^cmsDriver.py")
00732         cmd  = self.Commands[cpu][0]
00733         noExit = True
00734         stepreg = re.compile("--step=([^ ]*)")
00735         previousCmdOnline = ""
00736         for line in open(os.path.join(dir,"SimulationCandles_%s.txt" % (cmsver))):
00737             if (not line.lstrip().startswith("#")) and not (line.isspace() or len(line) == 0): 
00738                 cmdonline  = line.split("@@@",1)[0]
00739                 if cmsdrvreg.search(cmdonline) and not previousCmdOnline == cmdonline:
00740                     stepbeingrun = "Unknown"
00741                     matches = stepreg.search(cmdonline)
00742                     if not matches == None:
00743                         stepbeingrun = matches.groups()[0]
00744                     if "PILEUP" in cmdonline:
00745                         stepbeingrun += "_PILEUP"
00746                     self.logh.write(cmdonline + "\n")
00747                     cmds = ("cd %s"      % (dir),
00748                             "%s  >& ../cmsdriver_unit_test_%s_%s.log"    % (cmdonline,candle,stepbeingrun))
00749                     if self._dryrun:
00750                         self.logh.write(cmds + "\n")
00751                     else:
00752                         out = self.runCmdSet(cmds)                    
00753                         if not out == None:
00754                             sig     = out >> 16    # Get the top 16 bits
00755                             xstatus = out & 0xffff # Mask out all bits except the first 16 
00756                             self.logh.write("FATAL ERROR: CMS Driver returned a non-zero exit status (which is %s) when running %s for candle %s. Signal interrupt was %s\n" % (xstatus,stepbeingrun,candle,sig))
00757                             sys.exit()
00758                 previousCmdOnline = cmdonline
00759         
00760     ##############
00761     # Wrapper for cmsRelvalreportInput 
00762     # 
00763     def runCmsInput(self,cpu,dir,numevents,candle,cmsdrvopts,stepopt,profiles,bypasshlt,userInputFile):
00764 
00765         #Crappy fix for optional options with special synthax (bypasshlt and userInputFile)
00766         bypass = ""
00767         if bypasshlt:
00768             bypass = "--bypass-hlt"
00769         userInputFileOption=""
00770         if userInputFile:
00771            userInputFileOption = "--filein %s"%userInputFile
00772         cmd = self.Commands[cpu][2]
00773         cmds=[]
00774         #print cmds
00775         cmds = ("cd %s"                    % (dir),
00776                 "%s %s \"%s\" %s %s %s %s %s" % (cmd,
00777                                               numevents,
00778                                               candle,
00779                                               profiles,
00780                                               cmsdrvopts,
00781                                               stepopt,
00782                                               bypass,userInputFileOption))
00783         exitstat=0
00784         exitstat = self.runCmdSet(cmds)
00785         if self._unittest and (not exitstat == 0):
00786             self.logh.write("ERROR: CMS Report Input returned a non-zero exit status \n" )
00787         return exitstat
00788     ##############
00789     # Prepares the profiling directory and runs all the selected profiles (if this is not a unit test)
00790     #
00791     #Making parameters named to facilitate the handling of arguments (especially with the threading use case)
00792     def simpleGenReport(self,cpus,perfdir=os.getcwd(),NumEvents=1,candles=['MinBias'],cmsdriverOptions='',stepOptions='',Name='',profilers='',bypasshlt='',userInputRootFiles=''):
00793         callgrind = Name == "Callgrind"
00794         memcheck  = Name == "Memcheck"
00795     
00796         profCodes = {"TimeSize" : "0123",
00797                      "IgProf"   : "4567",
00798                      "IgProf_Perf":"47", #Added the Analyse to IgProf_Perf #FIXME: At the moment Analyse is always run whether 7 is selected or not! Issue to solve in cmsRelvalreportInput.py... but not really important (it's always been there, not impacting our use-cases).
00799                      "IgProf_Mem":"567",
00800                      "Callgrind": "8",
00801                      "Memcheck" : "9",
00802                      None       : "-1"} 
00803     
00804         profiles = profCodes[Name]
00805         if not profilers == "":
00806             profiles = profilers        
00807     
00808         RelvalreportExitCode=0
00809         
00810         for cpu in cpus:
00811             pfdir = perfdir
00812             if len(cpus) > 1:
00813                 pfdir = os.path.join(perfdir,"cpu_%s" % cpu)
00814             for candle in candles:
00815                 #Create the directory for cmsRelvalreport.py running (e.g. MinBias_TimeSize, etc)
00816                 #Catch the case of PILE UP:
00817                 if "--pileup" in cmsdriverOptions:
00818                    candlename=candle+"_PU"
00819                 else:
00820                    candlename=candle
00821                 adir=self.mkCandleDir(pfdir,candlename,Name)
00822                 if self._unittest:
00823                     # Run cmsDriver.py
00824                     if userInputRootFiles:
00825                        self.logh.write(userInputRootFiles)
00826                        userInputFile=userInputRootFiles[0]
00827                     else:
00828                        userInputFile=""
00829                     self.logh.flush()
00830                     self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile) 
00831                     self.testCmsDriver(cpu,adir,candle)
00832                 else:
00833                     if userInputRootFiles:
00834                        self.logh.write("Variable userInputRootFiles is %s\n"%userInputRootFiles)
00835                        #Need to use regexp, cannot rely on the order... since for different tests there are different candles...
00836                        #userInputFile=userInputRootFiles[candles.index(candle)]
00837                        #FIXME:
00838                        #Note the issue that the input files HAVE to have in their name the candle as is used in cmsPerfSuite.py command line!
00839                        #This is currently caught by a printout in the log: should be either taken care of with some exception to throw?
00840                        #Will put this in the documentation
00841                        userInputFile=""
00842                        candleregexp=re.compile(candle)
00843                        for file in userInputRootFiles:
00844                           if candleregexp.search(file):
00845                              userInputFile=file
00846                              self.logh.write("For these %s %s tests will use user input file %s\n"%(candlename,Name,userInputFile))
00847                        if userInputFile == "":
00848                           self.logh.write("***WARNING: For these %s %s tests could not find a matching input file in %s: will try to do without it!!!!!\n"%(candlename,Name,userInputRootFiles))
00849                        self.logh.flush()
00850                     else:
00851                        userInputFile=""
00852                     DummyTestName=candlename+"_"+stepOptions.split("=")[1]
00853                     DummyTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the timer (DummyTimer is just a reference, but we will use the dictionary to access this later...
00854                     TimerInfo[Name].update({DummyTestName:DummyTimer}) #Add the TimeSize timer to the dictionary
00855                     #The following command will create the appropriate SimulationCandlesX.txt file in the relevant directory, ready to run cmsRelvalreport.py on it.
00856                     self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile)            
00857                     #Here where the no_exec option kicks in (do everything but do not launch cmsRelvalreport.py, it also prevents cmsScimark spawning...):
00858                     if self._noexec:
00859                         self.logh.write("Running in debugging mode, without executing cmsRelvalreport.py\n")
00860                         self.logh.flush()
00861                         pass
00862                     else:
00863                         #The following command will launch cmsRelvalreport.py on the SimulationCandlesX.txt input file created above.
00864                         ExitCode=self.runCmsReport(cpu,adir,candle)
00865                         self.logh.write("Individual cmsRelvalreport.py ExitCode %s\n"%ExitCode)
00866                         RelvalreportExitCode=RelvalreportExitCode+ExitCode
00867                         self.logh.write("Summed cmsRelvalreport.py ExitCode %s\n"%RelvalreportExitCode)
00868                         self.logh.flush()
00869                     DummyTimer.set_end(datetime.datetime.now())
00870                     
00871                     #for proflog in proflogs:
00872                     #With the change from 2>1&|tee to >& to preserve exit codes, we need now to check all logs...
00873                     #less nice... we might want to do this externally so that in post-processing its a re-usable tool
00874                     globpath = os.path.join(adir,"*.log") #"%s.log"%candle)
00875                     self.logh.write("Looking for logs that match %s\n" % globpath)
00876                     logs     = glob.glob(globpath)
00877                     for log in logs:
00878                         self.logh.write("Found log %s\n" % log)
00879                         self.displayErrors(log)
00880         self.printFlush("Returned cumulative RelvalreportExitCode is %s"%RelvalreportExitCode)
00881         return RelvalreportExitCode
00882     
00883     ############
00884     # Runs benchmarking, cpu spinlocks on spare cores and profiles selected candles
00885     #
00886     #FIXME:
00887     #Could redesign interface of functions to use keyword arguments:
00888     #def runPerfSuite(**opts):
00889     #then instead of using castordir variable, would use opts['castordir'] etc    
00890     def runPerfSuite(self,
00891                      create           = False,
00892                      #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and  root://eoscms//eos/ instead of /castor/cern.ch/
00893                      #Actually not yet... for consistency we will keep it on CASTOR for now
00894                      #FIXME! Do the migration, following its implication in PerfDB application!
00895                      castordir        = "/castor/cern.ch/cms/store/relval/performance/",
00896                      TimeSizeEvents   = 100        ,
00897                      IgProfEvents     = 5          ,
00898                      CallgrindEvents  = 1          ,
00899                      MemcheckEvents   = 5          ,
00900                      cmsScimark       = 10         ,
00901                      cmsScimarkLarge  = 10         ,
00902                      cmsdriverOptions = ""         ,#Could use directly cmsRelValCmd.get_Options()
00903                      cmsdriverPUOptions= ""        ,
00904                      stepOptions      = ""         ,
00905                      quicktest        = False      ,
00906                      profilers        = ""         ,
00907                      cpus             = [1]        ,
00908                      cpu_list         = [1]        ,
00909                      cores            = 4          ,#Could use directly cmsCpuInfo.get_NumOfCores()
00910                      prevrel          = ""         ,
00911                      bypasshlt        = False      ,
00912                      runonspare       = True       ,
00913                      perfsuitedir     = os.getcwd(),
00914                      logfile          = None,
00915                      TimeSizeCandles      = ""         ,
00916                      IgProfCandles        = ""         ,
00917                      CallgrindCandles     = ""         ,
00918                      MemcheckCandles      = ""         ,
00919                      TimeSizePUCandles    = ""         ,
00920                      IgProfPUCandles      = ""         ,
00921                      CallgrindPUCandles   = ""         ,
00922                      MemcheckPUCandles    = ""         ,
00923                      PUInputFile          = ""         ,
00924                      userInputFile        = ""         ,
00925                      MailLogRecipients    = ""         ,
00926                      tarball              = ""         ):
00927         
00928         #Set up a variable for the FinalExitCode to be used as the sum of exit codes:
00929         FinalExitCode=0
00930 
00931         #Set up the logfile first!
00932         if not logfile == None:
00933            try:
00934               self.logh = open(logfile,"a")
00935            except (OSError, IOError), detail:
00936               self.logh.write(detail + "\n")
00937               self.logh.flush()  
00938 
00939         #Adding HEPSPEC06 score if available in /build/HEPSPEC06.score file
00940         self.HEPSPEC06 = 0 #Set it to 0 by default (so it is easy to catch in the DB too)
00941         try:
00942            HEPSPEC06_file=open("/build/HEPSPEC06.score","r")
00943            for line in HEPSPEC06_file.readlines():
00944               if not line.startswith("#") and "HEPSPEC06" in line:
00945                  self.HEPSPEC06= line.split()[2]
00946         except IOError:
00947            self.logh.write("***Warning***: Could not find file /build/HEPSPEC06.score file on this machine!\n")
00948            self.logh.flush()
00949 
00950         #Adding a copy of /proc/cpuinfo and /proc/meminfo in the working directory so it can be kept in the tarball on CASTOR:
00951         localcpuinfo=os.path.join(perfsuitedir,"cpuinfo")
00952         cpuinfo_exitcode=-1
00953         if os.path.exists(localcpuinfo):
00954            cpuinfo_exitcode=0
00955         else:
00956            self.logh.write("Copying /proc/cpuinfo in current working directory (%s)\n"%perfsuitedir)
00957            cpuinfo_exitcode=self.runcmd("cp /proc/cpuinfo %s"%perfsuitedir)
00958         localmeminfo=os.path.join(perfsuitedir,"meminfo")
00959         meminfo_exitcode=-1
00960         if os.path.exists(localmeminfo):
00961            meminfo_exitcode=0
00962         else:
00963            self.logh.write("Copying /proc/meminfo in current working directory (%s)\n"%perfsuitedir)
00964            meminfo_exitcode=self.runcmd("cp /proc/meminfo %s"%perfsuitedir)
00965         if cpuinfo_exitcode or meminfo_exitcode:
00966            self.logh.write("There was an issue copying the cpuinfo or meminfo files!\n")
00967         self.logh.flush()
00968         
00969         try:        
00970             if not prevrel == "":
00971                 self.logh.write("Production of regression information has been requested with release directory %s\n" % prevrel)
00972             if not cmsdriverOptions == "":
00973                 self.logh.write("Running cmsDriver.py with user defined options: %s\n" % cmsdriverOptions)
00974                 #Attach the full option synthax for cmsRelvalreportInput.py:
00975                 cmsdriverOptionsRelvalInput="--cmsdriver="+cmsdriverOptions
00976                 #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
00977             if not stepOptions == "":
00978                 self.logh.write("Running user defined steps only: %s\n" % stepOptions)
00979                 #Attach the full option synthax for cmsRelvalreportInput.py:
00980                 setpOptionsRelvalInput="--usersteps="+stepOptions
00981                 #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
00982             if bypasshlt:
00983                 #Attach the full option synthax for cmsRelvalreportInput.py:
00984                 bypasshltRelvalInput="--bypass-hlt"
00985                 #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
00986             self.logh.write("Current Architecture is %s\n"%self.cmssw_arch)
00987             self.logh.write("Current CMSSW version is %s\n"%self.cmssw_version)
00988             self.logh.write("This machine ( %s ) is assumed to have %s cores, and the suite will be run on cpu %s\n" %(self.host,cores,cpus))
00989             self.logh.write("This machine's HEPSPEC06 score is: %s \n"%self.HEPSPEC06)
00990             path=os.path.abspath(".")
00991             self.logh.write("Performance Suite started running at %s on %s in directory %s, run by user %s\n" % (self.getDate(),self.host,path,self.user))
00992             #Start the timer for the total performance suite running time:
00993             TotalTime=PerfSuiteTimer(start=datetime.datetime.now())
00994             #Also initialize the dictionary that will contain all the timing information:
00995             global TimerInfo
00996             TimerInfo={'TotalTime':{'TotalTime':TotalTime}} #Structure will be {'key':[PerfSuiteTimerInstance,...],...}
00997             #Obsolete popen4-> subprocess.Popen
00998             #showtags=os.popen4("showtags -r")[1].read()
00999             showtags=subprocess.Popen("showtags -r",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
01000             self.logh.write(showtags) # + "\n") No need for extra \n!
01001             self.logh.flush()
01002             #For the log:
01003             if self._verbose:
01004                 self.logh.write("The performance suite results tarball will be stored in CASTOR at %s\n" % self._CASTOR_DIR)
01005                 self.logh.write("%s TimeSize events\n" % TimeSizeEvents)
01006                 self.logh.write("%s IgProf events\n"   % IgProfEvents)
01007                 self.logh.write("%s Callgrind events\n" % CallgrindEvents)
01008                 self.logh.write("%s Memcheck events\n" % MemcheckEvents)
01009                 self.logh.write("%s cmsScimark benchmarks before starting the tests\n"      % cmsScimark)
01010                 self.logh.write("%s cmsScimarkLarge benchmarks before starting the tests\n" % cmsScimarkLarge)
01011                 self.logh.flush()
01012             #Actual script actions!
01013             #Will have to fix the issue with the matplotlib pie-charts:
01014             #Used to source /afs/cern.ch/user/d/dpiparo/w0/perfreport2.1installation/share/perfreport/init_matplotlib.sh
01015             #Need an alternative in the release
01016 
01017             #Code for the architecture benchmarking use-case
01018             if len(cpus) > 1:
01019                 for cpu in cpus:
01020                     cpupath = os.path.join(perfsuitedir,"cpu_%s" % cpu)
01021                     if not os.path.exists(cpupath):
01022                         os.mkdir(cpupath)
01023             
01024             self.Commands = {}
01025             AllScripts = self.Scripts + self.AuxiliaryScripts
01026     
01027             for cpu in range(cmsCpuInfo.get_NumOfCores()): #FIXME use the actual number of cores of the machine here!
01028                 self.Commands[cpu] = []
01029 
01030             #Information for the log:
01031             self.logh.write("Full path of all the scripts used in this run of the Performance Suite:\n")
01032             for script in AllScripts:
01033                 which="which " + script
01034     
01035                 #Logging the actual version of cmsDriver.py, cmsRelvalreport.py, cmsSimPyRelVal.pl
01036                 #Obsolete popen4-> subprocess.Popen
01037                 #whichstdout=os.popen4(which)[1].read()
01038                 whichstdout=subprocess.Popen(which,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
01039                 self.logh.write(whichstdout) # + "\n") No need of the extra \n!
01040                 if script in self.Scripts:
01041                     for cpu in range(cmsCpuInfo.get_NumOfCores()):#FIXME use the actual number of cores of the machine here!
01042                         command="taskset -c %s %s" % (cpu,script)
01043                         self.Commands[cpu].append(command)
01044                         
01045             #First submit the cmsScimark benchmarks on the unused cores:
01046             scimark = ""
01047             scimarklarge = ""
01048             if not (self._unittest or self._noexec):
01049                 if (len(cpu_list) != cores):
01050                     for core in range(cores):
01051                         if (not core in cpus) and runonspare:
01052                             self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
01053                             subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (perfsuitedir, str(core))            
01054                             command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
01055                             self.logh.write(command + "\n")
01056                       
01057                             #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
01058                             #cpus so it makes no sense to try reading its stdout/err
01059                             #Obsolete popen4-> subprocess.Popen
01060                             #os.popen4(command)
01061                             subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
01062                       
01063             self.logh.flush()
01064     
01065             #Don't do benchmarking if in debug mode... saves time
01066             benching = not self._debug
01067             ##FIXME:
01068             #We may want to introduce a switch here or agree on a different default (currently 10 cmsScimark and 10 cmsScimarkLarge)
01069             if benching and not (self._unittest or self._noexec): 
01070                 #Submit the cmsScimark benchmarks on the cpu where the suite will be run:
01071                 for cpu in cpus:
01072                     scimark      = open(os.path.join(perfsuitedir,"cmsScimark2.log")      ,"w")        
01073                     scimarklarge = open(os.path.join(perfsuitedir,"cmsScimark2_large.log"),"w")
01074                     if cmsScimark > 0:
01075                         self.logh.write("Starting with %s cmsScimark on cpu%s\n"       % (cmsScimark,cpu))
01076                         cmsScimarkInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
01077                         TimerInfo.update({'cmsScimarkTime':{'cmsScimarkInitial':cmsScimarkInitialTime}}) #Add the cmsScimarkInitialTime information to the general TimerInfo dictionary
01078                         self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
01079                         cmsScimarkInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimark initial timer
01080     
01081                     if cmsScimarkLarge > 0:
01082                         self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
01083                         cmsScimarkLargeInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLarge PerfSuiteTimer
01084                         TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeInitial':cmsScimarkLargeInitialTime}) #Add the cmsScimarkLargeInitialTime information to the general TimerInfo dictionary
01085                         self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge, large=True)
01086                         cmsScimarkLargeInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
01087                 self.logh.flush()
01088             #Handling the Pile up input file here:
01089             if (TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles) and not ("FASTSIM" in stepOptions):
01090                 #Note the FASTSIM exclusion... since there is no need to copy the file for FASTSIM.
01091                 PUInputName=os.path.join(perfsuitedir,"INPUT_PILEUP_EVENTS.root")
01092                 if PUInputFile:
01093                     #Define the actual command to copy the file locally:
01094                     #Allow the file to be mounted locally (or accessible via AFS)
01095                     copycmd="cp"
01096                     #Allow the file to be on CASTOR (taking a full CASTOR path)
01097                     if '/store/relval/' in PUInputFile:
01098                        #Switching from CASTOR to EOS, i.e. from rfcp to xrdcp
01099                         copycmd="xrdcp"
01100                         #Accept plain LFNs from DBS for RelVal CASTOR files:
01101                         #Minor fix to allow the case of user using the full path /castor/cern.ch/cms...
01102                         if PUInputFile.startswith('/store/relval/'):
01103                            #Switching to EOS from CASTOR:
01104                            #PUInputFile="/castor/cern.ch/cms"+PUInputFile
01105                            PUInputFile="root://eoscms//eos/cms"+PUInputFile
01106                     #Copy the file locally
01107                     self.logh.write("Copying the file %s locally to %s\n"%(PUInputFile,PUInputName))
01108                     self.logh.flush()
01109                     GetPUInput=subprocess.Popen("%s %s %s"%(copycmd,PUInputFile,PUInputName), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
01110                     GetPUInputExitCode=GetPUInput.wait()
01111                     #Allow even the potential copy of a local file (even one already named INPUT_PILEUP_EVENTS.root!)
01112                     if GetPUInputExitCode:
01113                         self.logh.write("The copying of the pile-up input file returned a non-zero exit code: %s \nThis is the stdout+stderr if the command:\n%s\n"%(GetPUInputExitCode,GetPUInput.stdout))
01114                 #Ultimately accept the case of the file being already there and not being specified in the --PUInputFile option
01115                 if not os.path.exists(PUInputName):
01116                     self.logh.write("The necessary INPUT_PILEUP_EVENTS.root file was not found in the working directory %s\nExiting now!"%perfsuitedir)
01117                     self.logh.flush()
01118                     sys.exit(1)
01119                 else:
01120                     #Set up here the DIGI PILE UP options
01121                     self.printFlush("Some PILE UP tests will be run!")
01122                     #Actually setting them earlier... when handling options... May not need this else after all... or just as a log entry.
01123                     self.printFlush("cmsdriverPUOptions is %s"%cmsdriverPUOptions)
01124                     pass
01125             
01126             #TimeSize tests:
01127             if TimeSizeEvents > 0:
01128                TimeSizeTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
01129                TimerInfo.update({'TimeSize':{'TotalTime':TimeSizeTime}}) #Add the TimeSize timer to the dictionary
01130                if TimeSizeCandles:
01131                   self.logh.write("Launching the TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
01132                   NoPileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
01133                   TimerInfo['TimeSize'].update({'NoPileUpTime':NoPileUpTime}) #Add the TimeSize No Pile Up tests timer to the list
01134                   self.printDate()
01135                   self.logh.flush()
01136                   ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizeCandles,cmsdriverOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
01137                   FinalExitCode=FinalExitCode+ReportExit
01138                   #Adding a time stamp here to parse for performance suite running time data
01139                   self.printFlush("Regular TimeSize tests were finished at %s"%(self.getDate()))
01140                   NoPileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
01141                
01142                #Launch eventual Digi Pile Up TimeSize too:
01143                if TimeSizePUCandles:
01144                   self.logh.write("Launching the PILE UP TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
01145                   PileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
01146                   TimerInfo['TimeSize'].update({'PileUpTime':PileUpTime}) #Add the TimeSize Pile Up tests timer to the list
01147                   self.printDate()
01148                   self.logh.flush()
01149                   ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizePUCandles,cmsdriverPUOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
01150                   FinalExitCode=FinalExitCode+ReportExit
01151                   #Adding a time stamp here to parse for performance suite running time data
01152                   self.printFlush("Pileup TimeSize tests were finished at %s"%(self.getDate()))
01153                   PileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
01154                   
01155                #Check for issue with 
01156                if not (TimeSizeCandles or TimeSizePUCandles):
01157                   self.printFlush("A number of events (%s) for TimeSize tests was selected, but no candle for regular or pileup tests was selected!"%(TimeSizeEvents))
01158                #Adding a time stamp here to parse for performance suite running time data
01159                self.printFlush("All TimeSize tests were finished at %s"%(self.getDate()))
01160                TimeSizeTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
01161             
01162             #Stopping all cmsScimark jobs and analysing automatically the logfiles
01163             #No need to waste CPU while the load does not affect Valgrind measurements!
01164             if not (self._unittest or self._noexec):
01165                 self.logh.write("Stopping all cmsScimark jobs now\n")
01166                 subcmd = "cd %s ; %s" % (perfsuitedir,self.AuxiliaryScripts[2])
01167                 stopcmd = "sh -c \"%s\"" % subcmd
01168                 self.printFlush(stopcmd)
01169                 #os.popen(stopcmd)
01170                 #Obsolete popen4-> subprocess.Popen
01171                 #self.printFlush(os.popen4(stopcmd)[1].read())
01172                 self.printFlush(subprocess.Popen(stopcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read())
01173 
01174             #From here on we can use all available cores to speed up the performance suite remaining tests:
01175             if cores==0: #When specifying the cpu to run the suite on, one has to set cores to 0 to avoid threading of PerfSuite itself...
01176                                           #So we need to catch this case for the IB tests case where we assign the test to a specific cpu.
01177                 AvailableCores=cpus
01178             elif len(cpu_list) == cores: # For the new relval case, when running all the tests on one machine,
01179                                          # specifying the same number of cores and cpus (like: --cores 3, --cpu 3,4,5)
01180                 AvailableCores=cpus
01181             else:
01182                 AvailableCores=range(cores)
01183                 
01184             #Initialize a list that will contain all the simpleGenReport keyword arguments (1 dictionary per test):
01185             TestsToDo=[]
01186             #IgProf tests:
01187             if IgProfEvents > 0:
01188                if IgProfCandles:
01189                    self.printFlush("Preparing IgProf tests")
01190                    #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
01191                    #So in general we want to be able to split the perf and mem tests...
01192                    #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
01193                    if profilers:
01194                       self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
01195                       #Prepare the simpleGenReport arguments for this test:
01196                       IgProfProfilerArgs={
01197                          'perfdir':perfsuitedir,
01198                          'NumEvents':IgProfEvents,
01199                          'candles':IgProfCandles,
01200                          'cmsdriverOptions':cmsdriverOptions,
01201                          'stepOptions':stepOptions,
01202                          'Name':"IgProf",
01203                          'profilers':profilers,
01204                          'bypasshlt':bypasshlt,
01205                          'userInputRootFiles':userInputFile
01206                          }
01207                       #Append the test to the TestsToDo list:
01208                       TestsToDo.append(IgProfProfilerArgs)
01209                       self.printFlush("Appended IgProf test with profiler option %s to the TestsToDo list"%profilers)
01210                    #For the default case (4,5,6,7) we split the tests into 2 jobs since they naturally are 2 cmsRun jobs and for machines with many cores this will
01211                    #make the performance suite run faster.
01212                    else:
01213                       self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
01214                       ##PERF##
01215                       #Prepare the simpleGenReport arguments for this test:
01216                       IgProfPerfArgs={
01217                          'perfdir':perfsuitedir,
01218                          'NumEvents':IgProfEvents,
01219                          'candles':IgProfCandles,
01220                          'cmsdriverOptions':cmsdriverOptions,
01221                          'stepOptions':stepOptions,
01222                          'Name':"IgProf_Perf",
01223                          'profilers':profilers,
01224                          'bypasshlt':bypasshlt,
01225                          'userInputRootFiles':userInputFile
01226                          }
01227                       #Append the test to the TestsToDo list:
01228                       TestsToDo.append(IgProfPerfArgs)
01229                       self.printFlush("Appended IgProf PERF test to the TestsToDo list")
01230                       ##MEM##
01231                       #Prepare the simpleGenReport arguments for this test:
01232                       IgProfMemArgs={
01233                          'perfdir':perfsuitedir,
01234                          'NumEvents':IgProfEvents,
01235                          'candles':IgProfCandles,
01236                          'cmsdriverOptions':cmsdriverOptions,
01237                          'stepOptions':stepOptions,
01238                          'Name':"IgProf_Mem",
01239                          'profilers':profilers,
01240                          'bypasshlt':bypasshlt,
01241                          'userInputRootFiles':userInputFile
01242                          }
01243                       #Append the test to the TestsToDo list:
01244                       TestsToDo.append(IgProfMemArgs)
01245                       self.printFlush("Appended IgProf MEM test to the TestsToDo list")
01246                 #The following will be handled in the while loop that handles the starting of the threads:
01247                 #ReportExit=self.simpleGenReport(cpus,perfsuitedir,IgProfEvents,IgProfCandles,cmsdriverOptions,stepOptions,"IgProf",profilers,bypasshlt,userInputFile)
01248                 #FinalExitCode=FinalExitCode+ReportExit
01249                 #Launch eventual Digi Pile Up IgProf too:
01250                if IgProfPUCandles:
01251                    self.printFlush("Preparing IgProf PileUp tests")
01252                    #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
01253                    #So in general we want to be able to split the perf and mem tests...
01254                    #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
01255                    if profilers:
01256                       self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
01257                       #Prepare the simpleGenReport arguments for this test:
01258                       IgProfProfilerPUArgs={
01259                          'perfdir':perfsuitedir,
01260                          'NumEvents':IgProfEvents,
01261                          'candles':IgProfPUCandles,
01262                          'cmsdriverOptions':cmsdriverPUOptions,
01263                          'stepOptions':stepOptions,
01264                          'Name':"IgProf",
01265                          'profilers':profilers,
01266                          'bypasshlt':bypasshlt,
01267                          'userInputRootFiles':userInputFile
01268                          }
01269                       #Append the test to the TestsToDo list:
01270                       TestsToDo.append(IgProfProfilerPUArgs)
01271                       self.printFlush("Appended IgProf PileUp test with profiler option %s to the TestsToDo list"%profilers)
01272                    else:
01273                       self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
01274                       ##PERF##
01275                       #Prepare the simpleGenReport arguments for this test:
01276                       IgProfPerfPUArgs={
01277                          'perfdir':perfsuitedir,
01278                          'NumEvents':IgProfEvents,
01279                          'candles':IgProfPUCandles,
01280                          'cmsdriverOptions':cmsdriverPUOptions,
01281                          'stepOptions':stepOptions,
01282                          'Name':"IgProf_Perf",
01283                          'profilers':profilers,
01284                          'bypasshlt':bypasshlt,
01285                          'userInputRootFiles':userInputFile
01286                          }
01287                       #Append the test to the TestsToDo list:
01288                       TestsToDo.append(IgProfPerfPUArgs)
01289                       self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
01290                       ##MEM##
01291                       #Prepare the simpleGenReport arguments for this test:
01292                       IgProfMemPUArgs={
01293                          'perfdir':perfsuitedir,
01294                          'NumEvents':IgProfEvents,
01295                          'candles':IgProfPUCandles,
01296                          'cmsdriverOptions':cmsdriverPUOptions,
01297                          'stepOptions':stepOptions,
01298                          'Name':"IgProf_Mem",
01299                          'profilers':profilers,
01300                          'bypasshlt':bypasshlt,
01301                          'userInputRootFiles':userInputFile
01302                          }
01303                       #Append the test to the TestsToDo list:
01304                       TestsToDo.append(IgProfMemPUArgs)
01305                       self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
01306                if not (IgProfCandles or IgProfPUCandles):
01307                    self.printFlush("A number of events (%s) for IgProf tests was selected, but no candle for regular or pileup tests was selected!"%(IgProfEvents))
01308                
01309                     
01310             #Valgrind tests:
01311             if CallgrindEvents > 0:
01312                if CallgrindCandles:
01313                   self.printFlush("Preparing Callgrind tests")
01314                   CallgrindArgs={
01315                      'perfdir':perfsuitedir,
01316                      'NumEvents':CallgrindEvents,
01317                      'candles':CallgrindCandles,
01318                      'cmsdriverOptions':cmsdriverOptions,
01319                      'stepOptions':stepOptions,
01320                      'Name':"Callgrind",
01321                      'profilers':profilers,
01322                      'bypasshlt':bypasshlt,
01323                      'userInputRootFiles':userInputFile
01324                      }
01325                   #Append the test to the TestsToDo list:
01326                   TestsToDo.append(CallgrindArgs)
01327                   self.printFlush("Appended Callgrind test to the TestsToDo list")
01328                #Launch eventual Digi Pile Up Callgrind too:
01329                if CallgrindPUCandles:
01330                   self.printFlush("Preparing Callgrind PileUp tests")
01331                   CallgrindPUArgs={
01332                      'perfdir':perfsuitedir,
01333                      'NumEvents':CallgrindEvents,
01334                      'candles':CallgrindPUCandles,
01335                      'cmsdriverOptions':cmsdriverPUOptions,
01336                      'stepOptions':stepOptions,
01337                      'Name':"Callgrind",
01338                      'profilers':profilers,
01339                      'bypasshlt':bypasshlt,
01340                      'userInputRootFiles':userInputFile
01341                      }
01342                   #Append the test to the TestsToDo list:
01343                   TestsToDo.append(CallgrindPUArgs)
01344                   self.printFlush("Appended Callgrind PileUp test to the TestsToDo list")
01345                if not (CallgrindCandles or CallgrindPUCandles):
01346                   self.printFlush("A number of events (%s) for Callgrind tests was selected, but no candle for regular or pileup tests was selected!"%(CallgrindEvents))
01347                   
01348             if MemcheckEvents > 0:
01349                if MemcheckCandles:
01350                   self.printFlush("Preparing Memcheck tests")
01351                   MemcheckArgs={
01352                      'perfdir':perfsuitedir,
01353                      'NumEvents':MemcheckEvents,
01354                      'candles':MemcheckCandles,
01355                      'cmsdriverOptions':cmsdriverOptions,
01356                      'stepOptions':stepOptions,
01357                      'Name':"Memcheck",
01358                      'profilers':profilers,
01359                      'bypasshlt':bypasshlt,
01360                      'userInputRootFiles':userInputFile
01361                      }
01362                   #Append the test to the TestsToDo list:
01363                   TestsToDo.append(MemcheckArgs)
01364                   self.printFlush("Appended Memcheck test to the TestsToDo list")
01365                #Launch eventual Digi Pile Up Memcheck too:
01366                if MemcheckPUCandles:
01367                   self.printFlush("Preparing Memcheck PileUp tests")
01368                   MemcheckPUArgs={
01369                      'perfdir':perfsuitedir,
01370                      'NumEvents':MemcheckEvents,
01371                      'candles':MemcheckPUCandles,
01372                      'cmsdriverOptions':cmsdriverPUOptions,
01373                      'stepOptions':stepOptions,
01374                      'Name':"Memcheck",
01375                      'profilers':profilers,
01376                      'bypasshlt':bypasshlt,
01377                      'userInputRootFiles':userInputFile
01378                      }
01379                   #Append the test to the TestsToDo list:
01380                   TestsToDo.append(MemcheckPUArgs)  
01381                   self.printFlush("Appended Memcheck PileUp test to the TestsToDo list")
01382                if not (MemcheckCandles or MemcheckPUCandles):
01383                   self.printFlush("A number of events (%s) for Memcheck tests was selected, but no candle for regular or pileup tests was selected!"%(MemcheckEvents))
01384                   
01385             #Here if there are any IgProf, Callgrind or MemcheckEvents to be run,
01386             #run the infinite loop that submits the PerfTest() threads on the available cores:
01387             if IgProfEvents or CallgrindEvents or MemcheckEvents:
01388                #FIXME:We should consider what behavior makes most sense in case we use the --cores option at this time only the cores=0 care is considered...
01389                self.printFlush("Threading all remaining tests on all %s available cores!"%len(AvailableCores))
01390                self.printDate()
01391                self.logh.flush()
01392                #Save the original AvailableCores list to use it as a test to break the infinite loop:
01393                #While in the regular RelVal use-case it makes sense to use the actual number of cores of the machines, in
01394                #the IB case the AvailableCores will always consist of only 1 single core..
01395                OriginalAvailableCores=list(AvailableCores) #Tricky list copy bug! without the list() OriginalAvalaibleCores would point to AvailableCores!
01396                #Print this out in the log for debugging reasons
01397                self.printFlush("Original available cores list: %s"%AvailableCores)
01398 
01399                #Create a dictionary to keep track of running threads on the various cores:
01400                activePerfTestThreads={}
01401                #Flag for waiting messages:
01402                Waiting=False
01403                while 1:
01404                   #Check if there are tests to run:
01405                   if TestsToDo:
01406                      #Using the Waiting flag to avoid writing this message every 5 seconds in the case
01407                      #of having more tests to do than available cores...
01408                      if not Waiting:
01409                         self.printFlush("Currently %s tests are scheduled to be run:"%len(TestsToDo))
01410                         self.printFlush(TestsToDo)
01411                      #Check the available cores:
01412                      if AvailableCores:
01413                         #Set waiting flag to False since we'll be doing something
01414                         Waiting=False
01415                         self.printFlush("There is/are %s core(s) available"%len(AvailableCores))
01416                         cpu=AvailableCores.pop()
01417                         self.printFlush("Let's use cpu %s"%cpu)
01418                         simpleGenReportArgs=TestsToDo.pop()
01419                         self.printFlush("Let's submit %s test on core %s"%(simpleGenReportArgs['Name'],cpu))
01420                         #Adding a Total timer for each of the threaded tests:
01421                         if simpleGenReportArgs['Name'] not in TimerInfo.keys():
01422                            #if 'TotalTime' not in TimerInfo[simpleGenReportArgs['Name']].keys():
01423                            self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
01424                            TimerInfo.update({simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary 
01425                         threadToDo=self.simpleGenReportThread(cpu,self,**simpleGenReportArgs) #Need to send self too, so that the thread has access to the PerfSuite.simpleGenReport() function
01426                         self.printFlush("Starting thread %s"%threadToDo)
01427                         ReportExitCode=threadToDo.start()
01428                         self.printFlush("Adding thread %s to the list of active threads"%threadToDo)
01429                         activePerfTestThreads[cpu]=threadToDo
01430                      #If there is no available core, pass, there will be some checking of activeThreads, a little sleep and then another check.
01431                      else:
01432                         pass
01433                   #Test activePerfThreads:
01434                   activeTestNames=[]
01435                   activeTestNamesPU=[]
01436                   for cpu in activePerfTestThreads.keys():
01437                      if activePerfTestThreads[cpu].isAlive():
01438                         #print "%% cpu %s activerPerfTestThreads[cpu] %s activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'] %s"%(cpu,activePerfTestThreads[cpu],activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'])
01439                         if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
01440                            activeTestNamesPU.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
01441                         else:
01442                            activeTestNames.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
01443                         pass
01444                      elif cpu not in AvailableCores:
01445                         #Set waiting flag to False since we'll be doing something
01446                         Waiting=False
01447                         self.printFlush(time.ctime())
01448                         self.printFlush("%s test, in thread %s is done running on core %s"%(activePerfTestThreads[cpu].simpleGenReportArgs['Name'],activePerfTestThreads[cpu],cpu) )
01449                         self.printFlush("About to append cpu %s to AvailableCores list"%cpu)
01450                         AvailableCores.append(cpu)
01451                         #Eliminate from activeTestNames lists:
01452                         #print activeTestNames
01453                         #print activeTestNamesPU
01454                         #print activePerfTestThreads[cpu].simpleGenReportArgs['Name']
01455                         if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
01456                            try:
01457                               activeTestNamesPU.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
01458                            except:
01459                               pass
01460                         else:
01461                            try:
01462                               activeTestNames.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
01463                            except:
01464                               pass
01465                         #Eliminate also from activePErfTestThreads dictionary:
01466                         activePerfTestThreads.pop(cpu)
01467                         #FIXME:
01468                         #Delicate check to stop the timer on the individual threaded test!
01469                         #Need to thik about it still...
01470                   #FIXME:
01471                   #Delicate check to stop the timers on the threaded tests:
01472                   #Check activePerfTestThreads dictionary for "Name" if any name is missing, the total can be stopped for that name.
01473                   #self.PerfTestTotalTimer
01474                   for TestName in ["IgProf_Perf","IgProf_Mem","Memcheck","Valgrind"]:
01475                      if (TestName not in activeTestNames) and (TestName not in activeTestNamesPU) :
01476                         try:
01477                            TimerInfo[TestName]['TotalTime'].set_end(datetime.datetime.now())
01478                         except:
01479                            #print "No %s test was running"%TestName
01480                            pass
01481                   #Buggy if... it seems we don't wait for the running thread to be finished...
01482                   #We should request:
01483                   #-All OriginalAvailableCores should be actually available.
01484                   if not AvailableCores==[] and (set(AvailableCores)==set(range(cmsCpuInfo.get_NumOfCores())) or set(AvailableCores)==set(OriginalAvailableCores)) and not TestsToDo:
01485                      self.printFlush("PHEW! We're done... all TestsToDo are done... at %s "%(self.getDate()))
01486                      #Debug printouts:
01487                      #print "AvailableCores",AvailableCores
01488                      #print "set(AvailableCores)",set(AvailableCores)
01489                      #print "set(range(cmsCpuInfo.get_NumOfCores())",set(range(cmsCpuInfo.get_NumOfCores()))
01490                      #print "OriginalAvailableCores",OriginalAvailableCores
01491                      #print "set(OriginalAvailableCores)",set(OriginalAvailableCores)                                   
01492                      #print "TestsToDo",TestsToDo
01493                      break
01494                   else:
01495                      #Putting the sleep statement first to avoid writing Waiting... before the output of the started thread reaches the log... 
01496                      time.sleep(5)
01497                      #Use Waiting flag to writing 1 waiting message while waiting and avoid having 1 message every 5 seconds...
01498                      if not Waiting:
01499                         self.printFlush(time.ctime())
01500                         self.printFlush("Waiting for tests to be done...")
01501                         sys.stdout.flush()
01502                         Waiting=True
01503             #End of the if for IgProf, Callgrind, Memcheck tests      
01504                   
01505             if benching and not (self._unittest or self._noexec):
01506                 #Ending the performance suite with the cmsScimark benchmarks again:
01507                 for cpu in cpus:
01508                     if cmsScimark > 0:
01509                         self.logh.write("Ending with %s cmsScimark on cpu%s\n"         % (cmsScimark,cpu))
01510                         cmsScimarkFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
01511                         TimerInfo['cmsScimarkTime'].update({'cmsScimarkFinal':cmsScimarkFinalTime}) #Add the cmsScimarkFinalTime information to the general TimerInfo dictionary
01512 
01513                         self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
01514                         cmsScimarkFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
01515                     if cmsScimarkLarge > 0:
01516                         self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
01517                         cmsScimarkLargeFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLargePerfSuiteTimer
01518                         TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeFinal':cmsScimarkLargeFinalTime}) #Add the cmsScimarkLargeFinalTime information to the general TimerInfo dictionary
01519                         self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge,large=True)
01520                         cmsScimarkLargeFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
01521     
01522             if prevrel:
01523                 self.logh.write("Running the regression analysis with respect to %s\n"%getVerFromLog(prevrel))
01524                 self.logh.write(time.ctime(time.time()))
01525                 self.logh.flush()
01526                 
01527                 crr.regressReports(prevrel,os.path.abspath(perfsuitedir),oldRelName = getVerFromLog(prevrel),newRelName=self.cmssw_version)
01528     
01529             #Create a tarball of the work directory
01530             if tarball:
01531                tarballTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the tarball PerfSuiteTimer
01532                TimerInfo.update({'tarballTime':{'TotalTime':tarballTime}})
01533                # Adding the str(stepOptions to distinguish the tarballs for 1 release
01534                # (GEN->DIGI, L1->RECO will be run in parallel)
01535                
01536                # Cleaning the stepOptions from the --usersteps=:
01537                if "=" in str(stepOptions):
01538                   fileStepOption=str(stepOptions).split("=")[1]
01539                else:
01540                   fileStepOption=str(stepOptions)
01541                if fileStepOption=="":
01542                   fileStepOption="UnknownStep"
01543                # Add the working directory used to avoid overwriting castor files (also put a check...)
01544                fileWorkingDir=os.path.basename(perfsuitedir)
01545                
01546                # Also add the --conditions and --eventcontent options used in the --cmsdriver options since it
01547                # is possible that the same tests will be run with different conditions and/or event content:               
01548                # Parse it out of --cmsdriver option:
01549                fileEventContentOption="UnknownEventContent"
01550                fileConditionsOption="UnknownConditions"
01551                for token in cmsdriverOptions.split("--"):
01552                   if token!='' and 'cmsdriver' not in token:
01553                      if "=" in token:
01554                         fileOption=token.split("=")[0]
01555                         fileOptionValue=token.split("=")[1].strip("'").strip('"')
01556                      else:
01557                         fileOption=token.split()[0]
01558                         fileOptionValue=token.split()[1].strip("'").strip('"')
01559                      if "eventcontent" or "conditions" in fileOption:
01560                         if "eventcontent" in fileOption:
01561                            fileEventContentOption=fileOptionValue
01562                         elif "conditions" in fileOption:
01563                            # check if we are using the autoCond style of flexible conditions
01564                            # if so, expand the condition here so that the file names contain the real conditions
01565                            if "auto:" in fileOptionValue: 
01566                               from Configuration.AlCa.autoCond import autoCond
01567                               fileConditionsOption = autoCond[ fileOptionValue.split(':')[1] ]
01568                            else:
01569                               # "old style" conditions, hardcoded values ...
01570                               # FIXME:
01571                               # Should put at least the convention in cmsPerfCommons to know how to parse it...
01572                               # Potential weak point if the conditions tag convention changes...
01573                               if "," in fileOptionValue: #Since 330, conditions don't have FrontierConditions_GlobalTag, in front of them anymore...
01574                                  fileConditionsOption=fileOptionValue.split("::")[0].split(",")[1] #"Backward" compatibility
01575                               else:
01576                                  fileConditionsOption=fileOptionValue.split("::")[0] 
01577                   else: # empty token
01578                      #print "Print this is the token: %s"%token
01579                      pass
01580                   
01581                #self.printFlush("Conditions label to add to the tarball name is %s"%fileConditionsOption)
01582                #self.printFlush("Eventcontent label to add to the tarball name is %s"%fileEventContentOption)
01583                      #FIXME:
01584                      #Could add the allowed event contents in the cmsPerfCommons.py file and use those to match in the command line options... This assumes maintenance of cmsPerfCommons.py
01585 
01586 
01587                #Create a tarball with just the logfiles
01588                subprocess.Popen("ls -R | grep .root > rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
01589                LogFile = "%s_%s_%s_%s_%s_%s_%s_%s_log.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
01590                AbsTarFileLOG = os.path.join(perfsuitedir,LogFile)
01591                tarcmd  = "tar zcfX %s %s %s" %(AbsTarFileLOG, "rootFiles", os.path.join(perfsuitedir,"*"))
01592                self.printFlush("Creating a tarball for the logfiles")
01593                self.printFlush(tarcmd)
01594                self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
01595                self.printFlush(subprocess.Popen("rm rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())               
01596 
01597                fullcastorpathlog=os.path.join(castordir,LogFile)
01598 
01599 
01600                #Create the tarball with the contents of the directory + md5 checksum
01601                TarFile = "%s_%s_%s_%s_%s_%s_%s_%s.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
01602                AbsTarFile = os.path.join(perfsuitedir,TarFile)
01603                tarcmd  = "tar -zcf %s %s" %(AbsTarFile, os.path.join(perfsuitedir,"*"))
01604                md5cmd = "md5sum %s" %(AbsTarFile)
01605                self.printFlush("Creating a tarball with the content of the directory")               
01606                self.printFlush(tarcmd)
01607                self.printFlush(md5cmd)               
01608                #FIXME:
01609                #Anything that will be logged after the tar command below will not enter the cmsPerfSuite.log in the tarball (by definition)...
01610                #To remain backward compatible the harvesting script needs to be based on the command above to identify the tarball location.
01611                #Obsolete popen4-> subprocess.Popen
01612                #self.printFlush(os.popen3(tarcmd)[2].read()) #Using popen3 to get only stderr we don't want the whole stdout of tar!
01613                self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
01614                md5sum = subprocess.Popen(md5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read().split()[0]
01615                self.printFlush("The md5 checksum of the tarball: %s" %(md5sum))
01616                AbsTarFileMD5 = AbsTarFile + ".md5"
01617                md5filecmd = "echo %s > %s" % (md5sum, AbsTarFileMD5)
01618                self.printFlush(subprocess.Popen(md5filecmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
01619                                                 
01620                #Archive it on CASTOR
01621                #Before archiving check if it already exist if it does print a message, but do not overwrite, so do not delete it from local dir:
01622                fullcastorpathfile=os.path.join(castordir,TarFile)
01623                fullcastorpathmd5=os.path.join(castordir,TarFile + ".md5")
01624                
01625                checkcastor="nsls  %s" % fullcastorpathfile
01626                #Obsolete os.popen-> subprocess.Popen                
01627                #checkcastorout=os.popen3(checkcastor)[1].read()
01628                checkcastorout=subprocess.Popen(checkcastor,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read()                
01629                if checkcastorout.rstrip()==fullcastorpathfile:
01630                   castorcmdstderr="File %s is already on CASTOR! Will NOT OVERWRITE!!!"%fullcastorpathfile
01631                else:
01632                   #Switching from CASTOR TO EOS, i.e. rfcp to xrdcp!
01633                   #Not YET!!!
01634                   #FIXME! Migrate to EOS eventually, taking into account implications for PerfDB logs linking!
01635                   castorcmd="rfcp %s %s" % (AbsTarFile,fullcastorpathfile)
01636                   castormd5cmd="rfcp %s %s" % (AbsTarFileMD5,fullcastorpathmd5)
01637                   castorlogcmd="rfcp %s %s" % (AbsTarFileLOG,fullcastorpathlog)
01638                   self.printFlush(castorcmd)
01639                   self.printFlush(castormd5cmd)
01640                   self.printFlush(castorlogcmd)
01641                   #Obsolete os.popen-> subprocess.Popen
01642                   #castorcmdstderr=os.popen3(castorcmd)[2].read()
01643                   castorcmdstderr=subprocess.Popen(castorcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
01644                   subprocess.Popen(castormd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
01645                   subprocess.Popen(castorlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()                  
01646                #Checking the stderr of the rfcp command to copy the tarball (.tgz) on CASTOR:
01647                if castorcmdstderr:
01648                    #If it failed print the stderr message to the log and tell the user the tarball (.tgz) is kept in the working directory
01649                    self.printFlush(castorcmdstderr)
01650                    self.printFlush("Since the CASTOR archiving for the tarball failed the file %s is kept in directory %s"%(TarFile, perfsuitedir))
01651                else:
01652                    #If it was successful then remove the tarball from the working directory:
01653                    self.printFlush("Successfully archived the tarball %s in CASTOR!"%(TarFile))
01654                    self.printFlush("The tarball can be found: %s"%(fullcastorpathfile))
01655                    self.printFlush("The logfile can be found: %s"%(fullcastorpathlog))                   
01656                    self.printFlush("Deleting the local copy of the tarballs")
01657                    rmtarballcmd="rm -Rf %s"%(AbsTarFile)
01658                    rmtarballmd5cmd="rm -Rf %s"%(AbsTarFileMD5)
01659                    rmtarballlogcmd="rm -Rf %s"%(AbsTarFileLOG)
01660                    self.printFlush(rmtarballcmd)
01661                    self.printFlush(rmtarballmd5cmd)
01662                    self.printFlush(rmtarballlogcmd)                   
01663                    #Obsolete os.popen-> subprocess.Popen
01664                    #self.printFlush(os.popen4(rmtarballcmd)[1].read())
01665                    self.printFlush(subprocess.Popen(rmtarballcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
01666                    self.printFlush(subprocess.Popen(rmtarballmd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
01667                    self.printFlush(subprocess.Popen(rmtarballlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
01668                tarballTime.set_end(datetime.datetime.now())
01669             else:
01670                self.printFlush("Performance Suite directory will not be archived in a tarball since --no_tarball option was chosen")
01671 
01672             #End of script actions!
01673     
01674             #Print a time stamp at the end:
01675             date=time.ctime(time.time())
01676             self.logh.write("Performance Suite finished running at %s on %s in directory %s\n" % (date,self.host,path))
01677             if self.ERRORS == 0:
01678                 self.logh.write("There were no errors detected in any of the log files!\n")
01679             else:
01680                 self.logh.write("ERROR: There were %s errors detected in the log files, please revise!\n" % self.ERRORS)
01681                 #print "No exit code test"
01682                 #sys.exit(1)
01683         except exceptions.Exception, detail:
01684            self.logh.write(str(detail) + "\n")
01685            self.logh.flush()
01686            if not self.logh.isatty():
01687               self.logh.close()
01688            raise
01689         #Add the possibility to send as an email the execution logfile to the user and whoever else interested:
01690         if MailLogRecipients != "": #Basically leave the option to turn it off too.. --mail ""
01691            self.printFlush("Sending email notification for this execution of the performance suite with command:")
01692            sendLogByMailcmd='cat cmsPerfSuite.log |mail -s "Performance Suite finished running on %s" '%self.host + MailLogRecipients
01693            self.printFlush(sendLogByMailcmd)
01694            #Obsolete os.popen-> subprocess.Popen
01695            #self.printFlush(os.popen4(sendLogByMailcmd)[1].read())
01696            self.printFlush(subprocess.Popen(sendLogByMailcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
01697         else:
01698            self.printFlush('No email notification will be sent for this execution of the performance suite since option --mail "" was used')
01699         
01700         TotalTime.set_end(datetime.datetime.now())        
01701         self.printFlush("Total Running Time\t%s hrs (%s mins)"%(TotalTime.get_duration()['hours'],TotalTime.get_duration()['minutes']))
01702 
01703         #Dump of the TimerInfo information
01704         #First dump it as a pickleq file...
01705         #Well in order to do so without the complication of serializing a custom class instance need to make the dictionary fully string-made:
01706         TimerInfoStr={}
01707         PerfSuiteTimerInfo=open("PerfSuiteTimerInfo.pkl","wb")
01708         #pickle.dump(TimerInfo,PerfSuiteTimerInfo)
01709         #PerfSuiteTimerInfo.close()
01710         #For now print it at the bottom of the log:
01711         self.logh.write("Test type\tActual Test\tDuration\tStart Time\tEnd Time\n")
01712         for key in TimerInfo.keys():
01713            #self.printFlush(key)
01714            TimerInfoStr.update({key:{}})
01715            for test in TimerInfo[key].keys():
01716               TimerInfoStr[key].update({test:[str(TimerInfo[key][test].get_duration()['hours'])+" hrs ("+str(TimerInfo[key][test].get_duration()['minutes'])+" mins)",TimerInfo[key][test].get_start(),TimerInfo[key][test].get_end()]})
01717               self.logh.write(key+"\t"+test+"\t")
01718               self.logh.write("%s hrs (%s mins)\t"%(TimerInfo[key][test].get_duration()['hours'],TimerInfo[key][test].get_duration()['minutes']))
01719               self.logh.write("%s\t"%TimerInfo[key][test].get_start())
01720               self.logh.write("%s\n"%TimerInfo[key][test].get_end())
01721         pickle.dump(TimerInfoStr,PerfSuiteTimerInfo) 
01722         PerfSuiteTimerInfo.close()
01723         
01724         self.logh.write("Final Performance Suite exit code was %s"%FinalExitCode)
01725         self.logh.flush()
01726         sys.exit(FinalExitCode)
01727 
01728 def main(argv=[__name__]): #argv is a list of arguments.
01729                      #Valid ways to call main with arguments:
01730                      #main(["--cmsScimark",10])
01731                      #main(["-t100"]) #With the caveat that the options.timeSize will be of type string... so should avoid using this!
01732                      #main(["-timeSize,100])
01733                      #Invalid ways:
01734                      #main(["One string with all options"])
01735     
01736     #Let's instatiate the class:
01737     suite=PerfSuite()
01738 
01739     #print suite
01740     #Uncomment this for tests with main() in inteactive python:
01741     #print suite.optionParse(argv)
01742     
01743     PerfSuiteArgs={}
01744     (PerfSuiteArgs['create'],
01745      PerfSuiteArgs['castordir'],
01746      PerfSuiteArgs['TimeSizeEvents'],
01747      PerfSuiteArgs['IgProfEvents'],    
01748      PerfSuiteArgs['CallgrindEvents'],
01749      PerfSuiteArgs['MemcheckEvents'],
01750      PerfSuiteArgs['cmsScimark'],      
01751      PerfSuiteArgs['cmsScimarkLarge'], 
01752      PerfSuiteArgs['cmsdriverOptions'],
01753      PerfSuiteArgs['cmsdriverPUOptions'],
01754      PerfSuiteArgs['stepOptions'],     
01755      PerfSuiteArgs['quicktest'],       
01756      PerfSuiteArgs['profilers'],       
01757      PerfSuiteArgs['cpus'],            
01758      PerfSuiteArgs['cores'],           
01759      PerfSuiteArgs['prevrel'],         
01760      PerfSuiteArgs['bypasshlt'],       
01761      PerfSuiteArgs['runonspare'],      
01762      PerfSuiteArgs['perfsuitedir'],    
01763      PerfSuiteArgs['logfile'],
01764      PerfSuiteArgs['TimeSizeCandles'],
01765      PerfSuiteArgs['IgProfCandles'],
01766      PerfSuiteArgs['CallgrindCandles'],
01767      PerfSuiteArgs['MemcheckCandles'],
01768      PerfSuiteArgs['TimeSizePUCandles'],
01769      PerfSuiteArgs['IgProfPUCandles'],
01770      PerfSuiteArgs['CallgrindPUCandles'],
01771      PerfSuiteArgs['MemcheckPUCandles'],
01772      PerfSuiteArgs['PUInputFile'],
01773      PerfSuiteArgs['userInputFile'],
01774      PerfSuiteArgs['MailLogRecipients'],
01775      PerfSuiteArgs['tarball']
01776      ) = suite.optionParse(argv)
01777 
01778     if PerfSuiteArgs['create']: # Before anything, request the AFS volume (it takes some time...)
01779        suite.createIgVolume()
01780 
01781     if not PerfSuiteArgs['logfile'] == None:
01782        if os.path.exists(PerfSuiteArgs['logfile']):
01783           oldlogfile=PerfSuiteArgs['logfile']+"_"+time.strftime("%d-%m-%Y_%H:%M:%S")
01784           #Move old logfile to a file with the same filename plus a timestamp appended
01785           mvOldLogfile=subprocess.Popen("mv %s %s"%(PerfSuiteArgs['logfile'],oldlogfile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
01786           mvOldLogfileExitCode=mvOldLogfile.wait()
01787           #Finally open the logfile and put the information above in it:
01788           try:
01789              ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
01790              if mvOldLogfileExitCode:
01791                 ActualLogfile.write("Please check what happened: A file named %s existed already and the attempt to move it to %s produced the following output: %s\n"%(PerfSuiteArgs['logfile'],oldlogfile,mvOldLogfile.stdout))
01792              else:
01793                 ActualLogfile.write("***WARNING! A file named %s existed already!\n***It has been moved to %s before starting the current logfile!\n"%(PerfSuiteArgs['logfile'],oldlogfile))
01794           except (OSError, IOError), detail:
01795              ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
01796               
01797        else:
01798           try:
01799              ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
01800           except (OSError, IOError), detail:
01801              ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
01802        ActualLogfile.flush()
01803                  
01804     #Three lines to add the exact command line used to call the performance suite directly in the log.
01805     ActualLogfile.write("Performance suite invoked with command line:\n")
01806     cmdline=reduce(lambda x,y:x+" "+y,sys.argv)
01807     ActualLogfile.write(cmdline+"\n")
01808     ActualLogfile.flush()
01809     
01810     #Debug printout that we could silence...
01811     ActualLogfile.write("Initial PerfSuite Arguments:\n")
01812     for key in PerfSuiteArgs.keys():
01813         ActualLogfile.write("%s %s\n"%(key,PerfSuiteArgs[key]))
01814     ActualLogfile.flush()
01815     #print PerfSuiteArgs
01816 
01817     PerfSuiteArgs['cpu_list'] = PerfSuiteArgs['cpus'] #To access the actual number of cpus used inside the threads..
01818 
01819     #Handle in here the case of multiple cores and the loading of cores with cmsScimark:
01820     if len(PerfSuiteArgs['cpus']) > 1:
01821         ActualLogfile.write("More than 1 cpu: threading the Performance Suite!\n")
01822         outputdir=PerfSuiteArgs['perfsuitedir']
01823         runonspare=PerfSuiteArgs['runonspare'] #Save the original value of runonspare for cmsScimark stuff
01824         cpus=PerfSuiteArgs['cpus']
01825         cores=PerfSuiteArgs['cores']
01826         if runonspare:
01827             for core in range(PerfSuiteArgs['cores']):
01828                 cmsScimarkLaunch_pslist={}
01829                 if len(cpus) != cores: #In case of this (relval), don't load the others with cmsScimark
01830                    if (core not in cpus):
01831                       #self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
01832                       ActualLogfile.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core)+"\n")
01833                       subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (outputdir, str(core))            
01834                       command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
01835                       #self.logh.write(command + "\n")
01836                       ActualLogfile.write(command+"\n")
01837                       #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
01838                       #cpus so it makes no sense to try reading its stdout/err
01839                       cmsScimarkLaunch_pslist[core]=subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
01840                       ActualLogfile.write("Spawned %s \n with PID %s"%(command,cmsScimarkLaunch_pslist[core].pid))
01841                       ActualLogfile.flush()
01842         PerfSuiteArgs['runonspare']=False #Set it to false to avoid cmsScimark being spawned by each thread
01843         logfile=PerfSuiteArgs['logfile']
01844         suitethread={}
01845         for cpu in cpus:
01846             #Make arguments "threaded" by setting for each instance of the suite:
01847             #1-A different output (sub)directory
01848             #2-Only 1 core on which to run
01849             #3-Automatically have a logfile... otherwise stdout is lost?
01850             #To be done:[3-A flag for Valgrind not to "thread" itself onto the other cores..]
01851             cpudir = os.path.join(outputdir,"cpu_%s" % cpu)
01852             if not os.path.exists(cpudir):
01853                 os.mkdir(cpudir)
01854             PerfSuiteArgs['perfsuitedir']=cpudir
01855             PerfSuiteArgs['cpus']=[cpu]  #Keeping the name cpus for now FIXME: change it to cpu in the whole code
01856             if PerfSuiteArgs['logfile']:
01857                 PerfSuiteArgs['logfile']=os.path.join(cpudir,os.path.basename(PerfSuiteArgs['logfile']))
01858             else:
01859                 PerfSuiteArgs['logfile']=os.path.join(cpudir,"cmsPerfSuiteThread.log")
01860             #Now spawn the thread with:
01861             suitethread[cpu]=PerfThread(**PerfSuiteArgs)
01862             #ActualLogfile.write(suitethread[cpu])
01863             ActualLogfile.write("Launching PerfSuite thread on cpu%s"%cpu)
01864             ActualLogfile.flush()
01865             #print "With arguments:"
01866             #print PerfSuiteArgs
01867             suitethread[cpu].start()
01868             
01869         while reduce(lambda x,y: x or y, map(lambda x: x.isAlive(),suitethread.values())):
01870            try:            
01871               time.sleep(5.0)
01872               sys.stdout.flush()
01873            except (KeyboardInterrupt, SystemExit):
01874               raise
01875         ActualLogfile.write("All PerfSuite threads have completed!\n")
01876         ActualLogfile.flush()
01877 
01878     else: #No threading, just run the performance suite on the cpu core selected
01879         suite.runPerfSuite(**PerfSuiteArgs)
01880     
01881 if __name__ == "__main__":
01882     
01883     main(sys.argv)