CMS 3D CMS Logo

cmsPerfSuite.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import os, time, sys, re, glob, exceptions
3 import optparse as opt
4 import cmsRelRegress as crr
5 from cmsPerfCommons import Candles, KeywordToCfi, CandFname, cmsDriverPileUpOption, getVerFromLog
6 import cmsRelValCmd,cmsCpuInfo
7 import threading #Needed in threading use for Valgrind
8 import subprocess #Nicer subprocess management than os.popen
9 import datetime #Used to time the running of the performance suite
10 import pickle #Used to dump the running timing information
11 from functools import reduce
12 
13 #Redefine _cleanup() function not to poll active processes
14 #[This is necessary to avoid issues when threading]
15 #So let's have it do nothing:
16 def _cleanup():
17  pass
18 #Override the function in subprocess
19 subprocess._cleanup=_cleanup
20 
21 class PerfThread(threading.Thread):
22  def __init__(self,**args):
23  self.args=args
24  threading.Thread.__init__(self)
25  def run(self):
27  #print "Arguments inside the thread instance:"
28  #print type(self.args)
29  #print self.args
30  self.suite.runPerfSuite(**(self.args))#self.args)
31 
33  """A class defining timing objects to time the running of the various parts of the performance suite. The class depends on module datetime."""
34  def __init__(self,start=None):
35  """Initialize the start time and set the end time to some indefinite time in the future"""
36  self.start = start
37  self.end = datetime.datetime.max
38  self.duration = self.start - self.end
39 
40  #Setters:
41  def set_start(self,start=None):
42  self.start = start
43  def set_end(self,end=None):
44  #print "Setting end time to %s"%end.ctime()
45  self.end = end
46  self.duration = self.end - self.start
47  #Getters
48  def get_start(self):
49  """Return the start time in ctime timestamp format"""
50  return self.start.ctime()
51  def get_end(self):
52  """Return the end time in ctime timestamp format"""
53  return self.end.ctime()
54  def get_duration(self):
55  """Return the duration between start and end as a dictionary with keys 'hours', 'minutes', 'seconds' to express the total duration in the favourite (most appropriate) unit. The function returns truncated integers."""
56  self.duration_seconds = self.duration.days*86400 + self.duration.seconds
59  return {'hours':self.duration_hours, 'minutes':self.duration_minutes, 'seconds':self.duration_seconds}
60 
61 class PerfSuite:
62  def __init__(self):
63 
64  self.ERRORS = 0
65  #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and root://eoscms//eos/ instead of /castor/cern.ch/
66  #NOT YET!
67  #FIX ME... do the migration to EOS eventually, taking care of PerFDB implications for tarball location!
68  self._CASTOR_DIR = "/castor/cern.ch/cms/store/relval/performance/"
69  self._dryrun = False
70  self._debug = False
71  self._unittest = False
72  self._noexec = False
73  self._verbose = True
74  self.logh = sys.stdout
75 
76  #Get some environment variables to use
77  try:
78  self.cmssw_arch = os.environ["SCRAM_ARCH"]
79  self.cmssw_version= os.environ["CMSSW_VERSION"]
80  self.host = os.environ["HOST"]
81  self.user = os.environ["USER"]
82  except KeyError:
83  self.logh.write('Error: An environment variable either SCRAM_ARCH, CMSSW_VERSION, HOST or USER is not available.\n')
84  self.logh.write(' Please run eval `scramv1 runtime -csh` to set your environment variables\n')
85  self.logh.flush()
86  sys.exit()
87 
88  #Scripts used by the suite:
89  self.Scripts =["cmsDriver.py","cmsRelvalreport.py","cmsRelvalreportInput.py","cmsScimark2"]
90  self.AuxiliaryScripts=["cmsScimarkLaunch.csh","cmsScimarkParser.py","cmsScimarkStop.py"]
91 
92 
93  #Threading the execution of IgProf, Memcheck and Callgrind using the same model used to thread the whole performance suite:
94  #1-Define a class simpleGenReportThread() that has relevant methods needed to handle PerfTest()
95  #2-Instantiate one with the necessary arguments to run simpleGenReport on core N
96  #3-Execute its "run" method by starting the thread
97  #Simplest way maybe is to keep 2 global lists:
98  #AvailableCores
99  #TestsToDo
100  #PerfSuite will fill the TestsToDo list with dictionaries, to be used as keyword arguments to instantiate a relevant thread.
101  #Once all the TestsToDo are "scheduled" into the list (FirstInLastOut buffer since we use pop()) PerfSuite will look into the
102  #AvailableCores list and start popping cores onto which to instantiate the relevant threads, then it will start the thread,
103  #appending it to the activePerfTestThread{},a dictionary with core as key and thread object as value, to facilitate bookkeeping.
104  #An infinite loop will take care of checking for AvailableCores as long as there are TestsToDo and keep submitting.
105  #In the same loop the activePerfTestThread{} will be checked for finished threads and it will re-append the relevant cpu back
106  #to the AvailableCores list.
107  #In the same loop a check for the case of all cores being back into AvailableCores with no more TestsToDo will break the infinite loop
108  #and declare the end of all tests.As else to this if a sleep statement of 5 seconds will delay the repetition of the loop.
109 
110  def createIgVolume(self):
111  igcommand = '/afs/cern.ch/cms/sdt/internal/scripts/requestPerfIgprofSpace.py --version ' + self.cmssw_version + ' --platform ' + self.cmssw_arch
112  subprocess.Popen(igcommand,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
113 
114 
115  class simpleGenReportThread(threading.Thread):
116  def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs): #Passing around the perfsuite object to be able to access simpleGenReport
117  self.cpu=cpu
118  self.simpleGenReportArgs=simpleGenReportArgs
119  self.perfsuiteinstance=perfsuiteinstance
120  threading.Thread.__init__(self)
121  def run(self):
122  self.PerfTest=self.perfsuiteinstance.PerfTest(self.cpu,self.perfsuiteinstance,**(self.simpleGenReportArgs))
123  self.PerfTest.runPerfTest()
124 
125  class PerfTest:
126  def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs):
127  self.cpu=cpu
128  self.simpleGenReportArgs=simpleGenReportArgs
129  self.perfsuiteinstance=perfsuiteinstance
130  def runPerfTest(self):
131 # self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
132 # TimerInfo.update({self.simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary
133  if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
134  self.perfsuiteinstance.logh.write("Launching the PILE UP %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
135  self.PerfTestPUTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
136  TimerInfo[self.simpleGenReportArgs['Name']].update({'PileUpTime':self.PerfTestPUTimer}) #Add the TimeSize timer to the dictionary
137 
138  else:
139  self.perfsuiteinstance.logh.write("Launching the %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
140  self.PerfTestTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
141  TimerInfo[self.simpleGenReportArgs['Name']].update({'NoPileUpTime':self.PerfTestTimer}) #Add the TimeSize timer to the dictionary
142  self.perfsuiteinstance.logh.flush()
143  #Cut and paste in bulk, should see if this works...
144  self.perfsuiteinstance.printDate()
145  self.perfsuiteinstance.logh.flush()
146  self.exitcode=self.perfsuiteinstance.simpleGenReport([self.cpu],**(self.simpleGenReportArgs)) #Returning ReportExit code
147  #Stop the timers on the threaded PileUp and NoPileUp tests:
148  if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
149  self.PerfTestPUTimer.set_end(datetime.datetime.now())
150  else:
151  self.PerfTestTimer.set_end(datetime.datetime.now())
152  return self.exitcode
153 
154  #Options handling
155  def optionParse(self,argslist=None):
156  parser = opt.OptionParser(usage='''./cmsPerfSuite.py [options]
157 
158  Examples:
159 
160  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunDigiPileUp TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent FEVTDEBUGHLT --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
161  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP for all TTbar tests defined, i.e. 5 TimeSize, 2 IgProf, 1 Callgrind, 5 Memcheck. The file /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root will be copied locally as INPUT_PILEUP_EVENTS.root and it will be used as the input file for the MixingModule pile up events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
162  OR
163  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root
164  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
165  OR
166  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
167  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once. It will also add the options "--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All" to all cmsDriver.py commands executed by the suite. In addition it will run only 2 cmsDriver.py "steps": "GEN,SIM" and "DIGI". Note the syntax GEN-SIM for combined cmsDriver.py steps)
168 
169  Legal entries for individual candles (--RunTimeSize, --RunIgProf, --RunCallgrind, --RunMemcheck options):
170  %s
171  ''' % ("\n".join(Candles)))
172 
173  parser.set_defaults(TimeSizeEvents = 0 ,
174  IgProfEvents = 0 ,
175  CallgrindEvents = 0 ,
176  MemcheckEvents = 0 ,
177  cmsScimark = 10 ,
178  cmsScimarkLarge = 10 ,
179  cmsdriverOptions = "--eventcontent FEVTDEBUGHLT", # Decided to avoid using the automatic parsing of cmsDriver_highstats_hlt.txt: cmsRelValCmd.get_cmsDriverOptions(), #Get these options automatically now!
180  #"Release Integrators" will create another file relative to the performance suite and the operators will fetch from that file the --cmsdriver option... for now just set the eventcontent since that is needed in order for things to run at all now...
181  stepOptions = "" ,
182  profilers = "" ,
183  outputdir = "" ,
184  logfile = os.path.join(os.getcwd(),"cmsPerfSuite.log"),
185  runonspare = True ,
186  bypasshlt = False ,
187  quicktest = False ,
188  unittest = False ,
189  noexec = False ,
190  dryrun = False ,
191  verbose = True ,
192  create = False ,
193  previousrel = "" ,
194  castordir = self._CASTOR_DIR,
195  cores = cmsCpuInfo.get_NumOfCores(), #Get Number of cpu cores on the machine from /proc/cpuinfo
196  cpu = "1" , #Cpu core on which the suite is run:
197  RunTimeSize = "" ,
198  RunIgProf = "" ,
199  RunCallgrind = "" ,
200  RunMemcheck = "" ,
201  RunDigiPileUP = "" ,
202  RunTimeSizePU = "" ,
203  RunIgProfPU = "" ,
204  RunCallgrindPU = "" ,
205  RunMemcheckPU = "" ,
206  PUInputFile = "" ,
207  userInputFile = "" )
208  parser.add_option('--createIgVol', action="store_true", dest='create',
209  help = 'Create IgProf AFS volume for the release and architecture')
210  parser.add_option('-q', '--quiet' , action="store_false", dest='verbose' ,
211  help = 'Output less information' )
212  parser.add_option('-b', '--bypass-hlt' , action="store_true" , dest='bypasshlt' ,
213  help = 'Bypass HLT root file as input to RAW2DIGI')
214  parser.add_option('-n', '--notrunspare', action="store_false", dest='runonspare',
215  help = 'Do not run cmsScimark on spare cores')
216  parser.add_option('-t', '--timesize' , type='int' , dest='TimeSizeEvents' , metavar='<#EVENTS>' ,
217  help = 'specify the number of events for the TimeSize tests' )
218  parser.add_option('-i', '--igprof' , type='int' , dest='IgProfEvents' , metavar='<#EVENTS>' ,
219  help = 'specify the number of events for the IgProf tests' )
220  parser.add_option('-c', '--callgrind' , type='int' , dest='CallgrindEvents' , metavar='<#EVENTS>' ,
221  help = 'specify the number of events for the Callgrind tests' )
222  parser.add_option('-m', '--memcheck' , type='int' , dest='MemcheckEvents' , metavar='<#EVENTS>' ,
223  help = 'specify the number of events for the Memcheck tests' )
224  parser.add_option('--cmsScimark' , type='int' , dest='cmsScimark' , metavar='' ,
225  help = 'specify the number of times the cmsScimark benchmark is run before and after the performance suite on cpu1')
226  parser.add_option('--cmsScimarkLarge' , type='int' , dest='cmsScimarkLarge' , metavar='' ,
227  help = 'specify the number of times the cmsScimarkLarge benchmark is run before and after the performance suite on cpu1')
228  parser.add_option('--cores' , type='int', dest='cores' , metavar='<CORES>' ,
229  help = 'specify the number of cores of the machine (can be used with 0 to stop cmsScimark from running on the other cores)')
230  parser.add_option('--cmsdriver' , type='string', dest='cmsdriverOptions', metavar='<OPTION_STR>',
231  help = 'specify special options to use with the cmsDriver.py commands (designed for integration build use')
232  parser.add_option('-a', '--archive' , type='string', dest='castordir' , metavar='<DIR>' ,
233  help = 'specify the wanted CASTOR directory where to store the results tarball')
234  parser.add_option('-L', '--logfile' , type='string', dest='logfile' , metavar='<FILE>' ,
235  help = 'file to store log output of the script')
236  parser.add_option('-o', '--output' , type='string', dest='outputdir' , metavar='<DIR>' ,
237  help = 'specify the directory where to store the output of the script')
238  parser.add_option('-r', '--prevrel' , type='string', dest='previousrel' , metavar='<DIR>' ,
239  help = 'Top level dir of previous release for regression analysis')
240  parser.add_option('--step' , type='string', dest='stepOptions' , metavar='<STEPS>' ,
241  help = 'specify the processing steps intended (instead of the default ones)' )
242  parser.add_option('--cpu' , type='string', dest='cpu' , metavar='<CPU>' ,
243  help = 'specify the core on which to run the performance suite')
244 
245  #Adding new options to put everything configurable at command line:
246  parser.add_option('--RunTimeSize' , type='string', dest='RunTimeSize' , metavar='<CANDLES>' ,
247  help = 'specify on which candles to run the TimeSize tests')
248  parser.add_option('--RunIgProf' , type='string', dest='RunIgProf' , metavar='<CANDLES>' ,
249  help = 'specify on which candles to run the IgProf tests')
250  parser.add_option('--RunCallgrind' , type='string', dest='RunCallgrind' , metavar='<CANDLES>' ,
251  help = 'specify on which candles to run the Callgrind tests')
252  parser.add_option('--RunMemcheck' , type='string', dest='RunMemcheck' , metavar='<CANDLES>' ,
253  help = 'specify on which candles to run the Memcheck tests')
254  parser.add_option('--RunDigiPileUp' , type='string', dest='RunDigiPileUp' , metavar='<CANDLES>' ,
255  help = 'specify the candle on which to run DIGI PILE UP and repeat all the tests set to run on that candle with PILE UP')
256  parser.add_option('--PUInputFile' , type='string', dest='PUInputFile' , metavar='<FILE>' ,
257  help = 'specify the root file to pick the pile-up events from')
258  parser.add_option('--RunTimeSizePU' , type='string', dest='RunTimeSizePU' , metavar='<CANDLES>' ,
259  help = 'specify on which candles to run the TimeSize tests with PILE UP')
260  parser.add_option('--RunIgProfPU' , type='string', dest='RunIgProfPU' , metavar='<CANDLES>' ,
261  help = 'specify on which candles to run the IgProf tests with PILE UP')
262  parser.add_option('--RunCallgrindPU' , type='string', dest='RunCallgrindPU' , metavar='<CANDLES>' ,
263  help = 'specify on which candles to run the Callgrind tests with PILE UP')
264  parser.add_option('--RunMemcheckPU' , type='string', dest='RunMemcheckPU' , metavar='<CANDLES>' ,
265  help = 'specify on which candles to run the Memcheck tests with PILE UP')
266 
267  #Adding a filein option to use pre-processed RAW file for RECO and HLT:
268  parser.add_option('--filein' , type='string', dest='userInputFile' , metavar='<FILE>', #default="",
269  help = 'specify input RAW root file for HLT and RAW2DIGI-RECO (list the files in the same order as the candles for the tests)')
270 
271  #Adding an option to handle additional (to the default user) email addresses to the email notification list (that sends the cmsPerfSuite.log once the performance suite is done running):
272  parser.add_option('--mail', type='string', dest='MailLogRecipients', metavar='<EMAIL ADDRESS>', default=self.user, help='specify valid email address(es) name@domain in order to receive notification at the end of the performance suite running with the cmsPerfSuite.log file')
273 
274  #Adding option to turn off tarball creation at the end of the execution of the performance suite:
275  parser.add_option('--no_tarball', action="store_false", dest='tarball', default=True, help='Turn off automatic tarball creation at the end of the performance suite execution')
276 
277  #####################
278  #
279  # Developer options
280  #
281 
282  devel = opt.OptionGroup(parser, "Developer Options",
283  "Caution: use these options at your own risk."
284  "It is believed that some of them bite.\n")
285 
286  devel.add_option('-p', '--profile' , type="str" , dest='profilers', metavar="<PROFILERS>" ,
287  help = 'Profile codes to use for cmsRelvalInput' )
288  devel.add_option('-f', '--false-run', action="store_true", dest='dryrun' ,
289  help = 'Dry run' )
290  devel.add_option('-d', '--debug' , action='store_true', dest='debug' ,
291  help = 'Debug' )
292  devel.add_option('--quicktest' , action="store_true", dest='quicktest',
293  help = 'Quick overwrite all the defaults to small numbers so that we can run a quick test of our chosing.' )
294  devel.add_option('--test' , action="store_true", dest='unittest' ,
295  help = 'Perform a simple test, overrides other options. Overrides verbosity and sets it to false.' )
296  devel.add_option('--no_exec' , action="store_true", dest='noexec' ,
297  help = 'Run the suite without executing the cmsRelvalreport.py commands in the various directories. This is a useful debugging tool.' )
298  parser.add_option_group(devel)
299  (options, args) = parser.parse_args(argslist)
300 
301 
302  self._debug = options.debug
303  self._unittest = options.unittest
304  self._noexec = options.noexec
305  self._verbose = options.verbose
306  self._dryrun = options.dryrun
307  create = options.create
308  castordir = options.castordir
309  TimeSizeEvents = options.TimeSizeEvents
310  IgProfEvents = options.IgProfEvents
311  CallgrindEvents = options.CallgrindEvents
312  MemcheckEvents = options.MemcheckEvents
313  cmsScimark = options.cmsScimark
314  cmsScimarkLarge = options.cmsScimarkLarge
315  cmsdriverOptions = options.cmsdriverOptions
316  stepOptions = options.stepOptions
317  quicktest = options.quicktest
318  #candleoption = options.candleOptions
319  runonspare = options.runonspare
320  profilers = options.profilers.strip()
321  cpu = options.cpu.strip()
322  bypasshlt = options.bypasshlt
323  cores = options.cores
324  logfile = options.logfile
325  prevrel = options.previousrel
326  outputdir = options.outputdir
327  RunTimeSize = options.RunTimeSize
328  RunIgProf = options.RunIgProf
329  RunCallgrind = options.RunCallgrind
330  RunMemcheck = options.RunMemcheck
331  RunDigiPileUp = options.RunDigiPileUp
332  RunTimeSizePU = options.RunTimeSizePU
333  RunIgProfPU = options.RunIgProfPU
334  RunCallgrindPU = options.RunCallgrindPU
335  RunMemcheckPU = options.RunMemcheckPU
336  PUInputFile = options.PUInputFile
337  userInputFile = options.userInputFile
338  if options.MailLogRecipients !="" and self.user not in options.MailLogRecipients: #To allow for the --mail "" case of suppressing the email and the default user case
339  MailLogRecipients= self.user+","+options.MailLogRecipients #Add the user by default if there is a mail report
340  else:
341  MailLogRecipients=options.MailLogRecipients
342  tarball = options.tarball
343 
344  #################
345  # Check logfile option
346  #
347  if not logfile == None:
348  logfile = os.path.abspath(logfile)
349  logdir = os.path.dirname(logfile)
350  if not os.path.exists(logdir):
351  parser.error("Directory to output logfile does not exist")
352  sys.exit()
353  logfile = os.path.abspath(logfile)
354 
355  #############
356  # Check step Options
357  #
358  if "GEN,SIM" in stepOptions:
359  self.logh.write("WARNING: Please use GEN-SIM with a hypen not a \",\"!\n")
360  #Using the step option as a switch between different dictionaries for:
361  #RunTimeSize,RunIgProf,RunCallgrind,RunMemCheck,RunDigiPileUp:
362  if stepOptions == "" or stepOptions == 'Default':
363  pass
364  else:
365  stepOptions='--usersteps=%s' % (stepOptions)
366 
367  ###############
368  # Check profile option
369  #
370  isnumreg = re.compile("^-?[0-9]*$")
371  found = isnumreg.search(profilers)
372  if not found :
373  parser.error("profile codes option contains non-numbers")
374  sys.exit()
375 
376  ###############
377  # Check output directory option
378  #
379  if outputdir == "":
380  outputdir = os.getcwd()
381  else:
382  outputdir = os.path.abspath(outputdir)
383 
384  if not os.path.isdir(outputdir):
385  parser.error("%s is not a valid output directory" % outputdir)
386  sys.exit()
387 
388  ################
389  # Check cpu option
390  #
391  numetcomreg = re.compile("^[0-9,]*")
392  if not numetcomreg.search(cpu):
393  parser.error("cpu option needs to be a comma separted list of ints or a single int")
394  sys.exit()
395 
396  cpustr = cpu
397  cpu = []
398  if "," in cpustr:
399  cpu = map(lambda x: int(x),cpustr.split(","))
400  else:
401  cpu = [ int(cpustr) ]
402 
403  ################
404  # Check previous release directory
405  #
406  if not prevrel == "":
407  prevrel = os.path.abspath(prevrel)
408  if not os.path.exists(prevrel):
409  self.logh.write("ERROR: Previous release dir %s could not be found" % prevrel)
410  sys.exit()
411 
412  #############
413  # Setup quicktest option
414  #
415  if quicktest:
416  TimeSizeEvents = 1
417  IgProfEvents = 1
418  CallgrindEvents = 0
419  MemcheckEvents = 0
420  cmsScimark = 1
421  cmsScimarkLarge = 1
422 
423  #############
424  # Setup unit test option
425  #
426  if self._unittest:
427  self._verbose = False
428  if stepOptions == "":
429  stepOptions = "GEN-SIM,DIGI,L1,DIGI2RAW,HLT,RAW2DIGI-RECO"
430  cmsScimark = 0
431  cmsScimarkLarge = 0
432  CallgrindEvents = 0
433  MemcheckEvents = 0
434  IgProfEvents = 0
435  TimeSizeEvents = 1
436 
437  #Split all the RunTimeSize etc candles in lists:
438  TimeSizeCandles=[]
439  IgProfCandles=[]
440  CallgrindCandles=[]
441  MemcheckCandles=[]
442  TimeSizePUCandles=[]
443  IgProfPUCandles=[]
444  CallgrindPUCandles=[]
445  MemcheckPUCandles=[]
446  userInputRootFiles=[]
447  if RunTimeSize:
448  TimeSizeCandles = RunTimeSize.split(",")
449  if RunIgProf:
450  IgProfCandles = RunIgProf.split(",")
451  if RunCallgrind:
452  CallgrindCandles = RunCallgrind.split(",")
453  if RunMemcheck:
454  MemcheckCandles = RunMemcheck.split(",")
455  if RunDigiPileUp:
456  for candle in RunDigiPileUp.split(","):
457  if candle in TimeSizeCandles:
458  TimeSizePUCandles.append(candle)
459  if candle in IgProfCandles:
460  IgProfPUCandles.append(candle)
461  if candle in CallgrindCandles:
462  CallgrindPUCandles.append(candle)
463  if candle in MemcheckCandles:
464  MemcheckPUCandles.append(candle)
465  if RunTimeSizePU:
466  TimeSizePUCandles.extend(RunTimeSizePU.split(","))
467  #Some smart removal of duplicates from the list!
468  temp=set(TimeSizePUCandles)
469  TimeSizePUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
470  if RunIgProfPU:
471  IgProfPUCandles.extend(RunIgProfPU.split(","))
472  #Some smart removal of duplicates from the list!
473  temp=set(IgProfPUCandles)
474  IgProfPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
475  if RunCallgrindPU:
476  CallgrindPUCandles.extend(RunCallgrindPU.split(","))
477  #Some smart removal of duplicates from the list!
478  temp=set(CallgrindPUCandles)
479  CallgrindPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
480  if RunMemcheckPU:
481  MemcheckPUCandles.extend(RunMemcheckPU.split(","))
482  #Some smart removal of duplicates from the list!
483  temp=set(MemcheckPUCandles)
484  MemcheckPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
485  if userInputFile:
486  userInputRootFiles=userInputFile.split(",")
487 
488 
489 
490  #############
491  # Setup cmsdriver and eventual cmsdriverPUoption
492  #
493  cmsdriverPUOptions=""
494  if cmsdriverOptions:
495  #Set the eventual Pile Up cmsdriver options first:
496  if TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles:
497  #Bug fixed: no space between --pileup= and LowLumiPileUp (otherwise could omit the =)
498  cmsdriverPUOptions = '--cmsdriver="%s %s%s"'%(cmsdriverOptions," --pileup=",cmsDriverPileUpOption)
499  #Set the regular ones too:
500  cmsdriverOptions = '--cmsdriver="%s"'%cmsdriverOptions
501 
502  return (create ,
503  castordir ,
504  TimeSizeEvents ,
505  IgProfEvents ,
506  CallgrindEvents ,
507  MemcheckEvents ,
508  cmsScimark ,
509  cmsScimarkLarge ,
510  cmsdriverOptions,
511  cmsdriverPUOptions,
512  stepOptions ,
513  quicktest ,
514  profilers ,
515  cpu ,
516  cores ,
517  prevrel ,
518  bypasshlt ,
519  runonspare ,
520  outputdir ,
521  logfile ,
522  TimeSizeCandles ,
523  IgProfCandles ,
524  CallgrindCandles,
525  MemcheckCandles ,
526  TimeSizePUCandles ,
527  IgProfPUCandles ,
528  CallgrindPUCandles,
529  MemcheckPUCandles ,
530  PUInputFile ,
531  userInputRootFiles,
532  MailLogRecipients,
533  tarball)
534 
535  #def usage(self):
536  # return __doc__
537 
538  ############
539  # Run a list of commands using system
540  # ! We should rewrite this not to use system (most cases it is unnecessary)
541  def runCmdSet(self,cmd):
542  exitstat = 0
543  if len(cmd) <= 1:
544  exitstat = self.runcmd(cmd)
545  if self._verbose:
546  self.printFlush(cmd)
547  else:
548  for subcmd in cmd:
549  if self._verbose:
550  self.printFlush(subcmd)
551  exitstat = self.runcmd(" && ".join(cmd))
552  if self._verbose:
553  self.printFlush(self.getDate())
554  return exitstat
555 
556  #############
557  # Print and flush a string (for output to a log file)
558  #
559  def printFlush(self,command):
560  if self._verbose:
561  self.logh.write(str(command) + "\n")
562  self.logh.flush()
563 
564  #############
565  # Run a command and return the exit status
566  #
567  def runcmd(self,command):
568  #Substitute popen with subprocess.Popen!
569  #Using try/except until Popen becomes thread safe (it seems that everytime it is called
570  #all processes are checked to reap the ones that are done, this creates a race condition with the wait()... that
571  #results into an error with "No child process".
572  #os.popen(command)
573  try:
574  process = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
575  pid=process.pid
576  exitstat= process.wait()
577  cmdout = process.stdout.read()
578  exitstat = process.returncode
579  except OSError as detail:
580  self.logh.write("Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s\n"%(command,pid,detail))
581  self.logh.flush()
582  exitstat=999
583  cmdout="Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s"%(command,pid,detail)
584  if self._verbose:
585  self.logh.write(cmdout)# + "\n") No need of extra \n!
586  self.logh.flush()
587  if exitstat == None:
588  self.logh.write("Something strange is going on! Exit code was None for command %s: check if it really ran!"%command)
589  self.logh.flush()
590  exitstat=0
591  return exitstat
592 
593  def getDate(self):
594  return time.ctime()
595 
596  def printDate(self):
597  self.logh.write(self.getDate() + "\n")
598  self.logh.flush()
599  #############
600  # Make directory for a particular candle and profiler.
601  # ! This is really unnecessary code and should be replaced with a os.mkdir() call
602  def mkCandleDir(self,pfdir,candle,profiler):
603  adir = os.path.join(pfdir,"%s_%s" % (candle,profiler))
604  self.runcmd( "mkdir -p %s" % adir )
605  if self._verbose:
606  self.printDate()
607  return adir
608 
609  #############
610  # Copy root file from another candle's directory
611  # ! Again this is messy.
612 
613  def cprootfile(self,dir,candle,NumOfEvents,cmsdriverOptions=""):
614  cmds = ("cd %s" % dir,
615  "cp -pR ../%s_IgProf/%s_GEN,SIM.root ." % (candle,CandFname[candle]))
616 
617  if self.runCmdSet(cmds):
618  self.logh.write("Since there was no ../%s_IgProf/%s_GEN,SIM.root file it will be generated first\n"%(candle,CandFname[candle]))
619 
620  cmd = "cd %s ; cmsDriver.py %s -s GEN,SIM -n %s --fileout %s_GEN,SIM.root %s>& %s_GEN_SIM_for_valgrind.log" % (dir,KeywordToCfi[candle],str(NumOfEvents),candle,cmsdriverOptions,candle)
621 
622  self.printFlush(cmd)
623  #Obsolete popen4-> subprocess.Popen
624  #cmdout=os.popen3(cmd)[2].read()
625  cmdout=subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
626  if cmdout:
627  self.printFlush(cmdout)
628  return cmdout
629 
630  #############
631  # Display G4 cerr errors and CMSExceptions in the logfile
632  #
633  def displayErrors(self,file):
634  try:
635  for line in open(file,"r"):
636  if "cerr" in line or "CMSException" in line:
637  self.logh.write("ERROR: %s\n" % line)
638  self.ERRORS += 1
639  except OSError as detail:
640  self.logh.write("WARNING: %s\n" % detail)
641  self.ERRORS += 1
642  except IOError as detail:
643  self.logh.write("WARNING: %s\n" % detail)
644  self.ERRORS += 1
645 
646  ##############
647  # Filter lines in the valgrind report that match GEN,SIM
648  #
649  def valFilterReport(self,dir):
650  #cmds = ("cd %s" % dir,
651  # "grep -v \"step=GEN,SIM\" SimulationCandles_%s.txt > tmp" % (self.cmssw_version),
652  # "mv tmp SimulationCandles_%s.txt" % (self.cmssw_version))
653  #FIXME:
654  #Quick and dirty hack to have valgrind MemCheck run on 5 events on both GEN,SIM and DIGI in QCD_80_120, while removing the line for GEN,SIM for Callgrind
655  InputFileName=os.path.join(dir,"SimulationCandles_%s.txt"%(self.cmssw_version))
656  InputFile=open(InputFileName,"r")
657  InputLines=InputFile.readlines()
658  InputFile.close()
659  Outputfile=open(InputFileName,"w")
660  simRegxp=re.compile("step=GEN,SIM")
661  digiRegxp=re.compile("step=DIGI")
662  CallgrindRegxp=re.compile("ValgrindFCE")
663  MemcheckRegxp=re.compile("Memcheck")
664  NumEvtRegxp=re.compile("-n 1")#FIXME Either use the ValgrindEventNumber or do a more general match!
665  for line in InputLines:
666  if simRegxp.search(line) and CallgrindRegxp.search(line):
667  continue
668  elif simRegxp.search(line) and MemcheckRegxp.search(line):
669  #Modify
670  if NumEvtRegxp.search(line):
671  line=NumEvtRegxp.sub(r"-n 5",line)
672  else:
673  self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
674  Outputfile.write(line)
675  elif digiRegxp.search(line) and MemcheckRegxp.search(line):
676  #Modify
677  if NumEvtRegxp.search(line):
678  line=NumEvtRegxp.sub(r"-n 5",line)
679  else:
680  self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
681  Outputfile.write(line)
682  else:
683  Outputfile.write(line)
684  self.logh.flush()
685  Outputfile.close()
686 
687  #self.runCmdSet(cmds)
688 
689  ##################
690  # Run cmsScimark benchmarks a number of times
691  #
692  def benchmarks(self,cpu,pfdir,name,bencher,large=False):
693  cmd = self.Commands[cpu][3]
694  redirect = ""
695  if large:
696  redirect = " -large >>"
697  else:
698  redirect = " >>"
699 
700  for i in range(bencher):
701  #Check first for the existence of the file so that we can append:
702  if not os.path.exists(os.path.join(pfdir,os.path.basename(name))):
703  #Equivalent of touch to make sure the file exist to be able to append to it.
704  open(os.path.join(pfdir,os.path.basename(name)))
705 
706  command= cmd + redirect + os.path.join(pfdir,os.path.basename(name))
707  self.printFlush(command + " [%s/%s]" % (i+1,bencher))
708  self.runcmd(command)
709  self.logh.flush()
710 
711  ##################
712  # This function is a wrapper around cmsRelvalreport
713  #
714  def runCmsReport(self,cpu,dir,candle):
715  cmd = self.Commands[cpu][1]
716  cmds = ("cd %s" % (dir),
717  "%s -i SimulationCandles_%s.txt -t perfreport_tmp -R -P >& %s.log" % (cmd,self.cmssw_version,candle))
718  exitstat = 0
719  if not self._debug:
720  exitstat = self.runCmdSet(cmds)
721 
722  if self._unittest and (not exitstat == 0):
723  self.logh.write("ERROR: CMS Report returned a non-zero exit status \n")
724  sys.exit(exitstat)
725  else:
726  return(exitstat) #To return the exit code of the cmsRelvalreport.py commands to the runPerfSuite function
727 
728  ##################
729  # Test cmsDriver.py (parses the simcandles file, removing duplicate lines, and runs the cmsDriver part)
730  #
731  def testCmsDriver(self,cpu,dir,cmsver,candle):
732  cmsdrvreg = re.compile("^cmsDriver.py")
733  cmd = self.Commands[cpu][0]
734  noExit = True
735  stepreg = re.compile("--step=([^ ]*)")
736  previousCmdOnline = ""
737  for line in open(os.path.join(dir,"SimulationCandles_%s.txt" % (cmsver))):
738  if (not line.lstrip().startswith("#")) and not (line.isspace() or len(line) == 0):
739  cmdonline = line.split("@@@",1)[0]
740  if cmsdrvreg.search(cmdonline) and not previousCmdOnline == cmdonline:
741  stepbeingrun = "Unknown"
742  matches = stepreg.search(cmdonline)
743  if not matches == None:
744  stepbeingrun = matches.groups()[0]
745  if "PILEUP" in cmdonline:
746  stepbeingrun += "_PILEUP"
747  self.logh.write(cmdonline + "\n")
748  cmds = ("cd %s" % (dir),
749  "%s >& ../cmsdriver_unit_test_%s_%s.log" % (cmdonline,candle,stepbeingrun))
750  if self._dryrun:
751  self.logh.write(cmds + "\n")
752  else:
753  out = self.runCmdSet(cmds)
754  if not out == None:
755  sig = out >> 16 # Get the top 16 bits
756  xstatus = out & 0xffff # Mask out all bits except the first 16
757  self.logh.write("FATAL ERROR: CMS Driver returned a non-zero exit status (which is %s) when running %s for candle %s. Signal interrupt was %s\n" % (xstatus,stepbeingrun,candle,sig))
758  sys.exit()
759  previousCmdOnline = cmdonline
760 
761  ##############
762  # Wrapper for cmsRelvalreportInput
763  #
764  def runCmsInput(self,cpu,dir,numevents,candle,cmsdrvopts,stepopt,profiles,bypasshlt,userInputFile):
765 
766  #Crappy fix for optional options with special synthax (bypasshlt and userInputFile)
767  bypass = ""
768  if bypasshlt:
769  bypass = "--bypass-hlt"
770  userInputFileOption=""
771  if userInputFile:
772  userInputFileOption = "--filein %s"%userInputFile
773  cmd = self.Commands[cpu][2]
774  cmds=[]
775  #print cmds
776  cmds = ("cd %s" % (dir),
777  "%s %s \"%s\" %s %s %s %s %s" % (cmd,
778  numevents,
779  candle,
780  profiles,
781  cmsdrvopts,
782  stepopt,
783  bypass,userInputFileOption))
784  exitstat=0
785  exitstat = self.runCmdSet(cmds)
786  if self._unittest and (not exitstat == 0):
787  self.logh.write("ERROR: CMS Report Input returned a non-zero exit status \n" )
788  return exitstat
789  ##############
790  # Prepares the profiling directory and runs all the selected profiles (if this is not a unit test)
791  #
792  #Making parameters named to facilitate the handling of arguments (especially with the threading use case)
793  def simpleGenReport(self,cpus,perfdir=os.getcwd(),NumEvents=1,candles=['MinBias'],cmsdriverOptions='',stepOptions='',Name='',profilers='',bypasshlt='',userInputRootFiles=''):
794  callgrind = Name == "Callgrind"
795  memcheck = Name == "Memcheck"
796 
797  profCodes = {"TimeSize" : "0123",
798  "IgProf" : "4567",
799  "IgProf_Perf":"47", #Added the Analyse to IgProf_Perf #FIXME: At the moment Analyse is always run whether 7 is selected or not! Issue to solve in cmsRelvalreportInput.py... but not really important (it's always been there, not impacting our use-cases).
800  "IgProf_Mem":"567",
801  "Callgrind": "8",
802  "Memcheck" : "9",
803  None : "-1"}
804 
805  profiles = profCodes[Name]
806  if not profilers == "":
807  profiles = profilers
808 
809  RelvalreportExitCode=0
810 
811  for cpu in cpus:
812  pfdir = perfdir
813  if len(cpus) > 1:
814  pfdir = os.path.join(perfdir,"cpu_%s" % cpu)
815  for candle in candles:
816  #Create the directory for cmsRelvalreport.py running (e.g. MinBias_TimeSize, etc)
817  #Catch the case of PILE UP:
818  if "--pileup" in cmsdriverOptions:
819  candlename=candle+"_PU"
820  else:
821  candlename=candle
822  adir=self.mkCandleDir(pfdir,candlename,Name)
823  if self._unittest:
824  # Run cmsDriver.py
825  if userInputRootFiles:
826  self.logh.write(userInputRootFiles)
827  userInputFile=userInputRootFiles[0]
828  else:
829  userInputFile=""
830  self.logh.flush()
831  self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile)
832  self.testCmsDriver(cpu,adir,candle)
833  else:
834  if userInputRootFiles:
835  self.logh.write("Variable userInputRootFiles is %s\n"%userInputRootFiles)
836  #Need to use regexp, cannot rely on the order... since for different tests there are different candles...
837  #userInputFile=userInputRootFiles[candles.index(candle)]
838  #FIXME:
839  #Note the issue that the input files HAVE to have in their name the candle as is used in cmsPerfSuite.py command line!
840  #This is currently caught by a printout in the log: should be either taken care of with some exception to throw?
841  #Will put this in the documentation
842  userInputFile=""
843  candleregexp=re.compile(candle)
844  for file in userInputRootFiles:
845  if candleregexp.search(file):
846  userInputFile=file
847  self.logh.write("For these %s %s tests will use user input file %s\n"%(candlename,Name,userInputFile))
848  if userInputFile == "":
849  self.logh.write("***WARNING: For these %s %s tests could not find a matching input file in %s: will try to do without it!!!!!\n"%(candlename,Name,userInputRootFiles))
850  self.logh.flush()
851  else:
852  userInputFile=""
853  DummyTestName=candlename+"_"+stepOptions.split("=")[1]
854  DummyTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the timer (DummyTimer is just a reference, but we will use the dictionary to access this later...
855  TimerInfo[Name].update({DummyTestName:DummyTimer}) #Add the TimeSize timer to the dictionary
856  #The following command will create the appropriate SimulationCandlesX.txt file in the relevant directory, ready to run cmsRelvalreport.py on it.
857  self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile)
858  #Here where the no_exec option kicks in (do everything but do not launch cmsRelvalreport.py, it also prevents cmsScimark spawning...):
859  if self._noexec:
860  self.logh.write("Running in debugging mode, without executing cmsRelvalreport.py\n")
861  self.logh.flush()
862  pass
863  else:
864  #The following command will launch cmsRelvalreport.py on the SimulationCandlesX.txt input file created above.
865  ExitCode=self.runCmsReport(cpu,adir,candle)
866  self.logh.write("Individual cmsRelvalreport.py ExitCode %s\n"%ExitCode)
867  RelvalreportExitCode=RelvalreportExitCode+ExitCode
868  self.logh.write("Summed cmsRelvalreport.py ExitCode %s\n"%RelvalreportExitCode)
869  self.logh.flush()
870  DummyTimer.set_end(datetime.datetime.now())
871 
872  #for proflog in proflogs:
873  #With the change from 2>1&|tee to >& to preserve exit codes, we need now to check all logs...
874  #less nice... we might want to do this externally so that in post-processing its a re-usable tool
875  globpath = os.path.join(adir,"*.log") #"%s.log"%candle)
876  self.logh.write("Looking for logs that match %s\n" % globpath)
877  logs = glob.glob(globpath)
878  for log in logs:
879  self.logh.write("Found log %s\n" % log)
880  self.displayErrors(log)
881  self.printFlush("Returned cumulative RelvalreportExitCode is %s"%RelvalreportExitCode)
882  return RelvalreportExitCode
883 
884  ############
885  # Runs benchmarking, cpu spinlocks on spare cores and profiles selected candles
886  #
887  #FIXME:
888  #Could redesign interface of functions to use keyword arguments:
889  #def runPerfSuite(**opts):
890  #then instead of using castordir variable, would use opts['castordir'] etc
891  def runPerfSuite(self,
892  create = False,
893  #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and root://eoscms//eos/ instead of /castor/cern.ch/
894  #Actually not yet... for consistency we will keep it on CASTOR for now
895  #FIXME! Do the migration, following its implication in PerfDB application!
896  castordir = "/castor/cern.ch/cms/store/relval/performance/",
897  TimeSizeEvents = 100 ,
898  IgProfEvents = 5 ,
899  CallgrindEvents = 1 ,
900  MemcheckEvents = 5 ,
901  cmsScimark = 10 ,
902  cmsScimarkLarge = 10 ,
903  cmsdriverOptions = "" ,#Could use directly cmsRelValCmd.get_Options()
904  cmsdriverPUOptions= "" ,
905  stepOptions = "" ,
906  quicktest = False ,
907  profilers = "" ,
908  cpus = [1] ,
909  cpu_list = [1] ,
910  cores = 4 ,#Could use directly cmsCpuInfo.get_NumOfCores()
911  prevrel = "" ,
912  bypasshlt = False ,
913  runonspare = True ,
914  perfsuitedir = os.getcwd(),
915  logfile = None,
916  TimeSizeCandles = "" ,
917  IgProfCandles = "" ,
918  CallgrindCandles = "" ,
919  MemcheckCandles = "" ,
920  TimeSizePUCandles = "" ,
921  IgProfPUCandles = "" ,
922  CallgrindPUCandles = "" ,
923  MemcheckPUCandles = "" ,
924  PUInputFile = "" ,
925  userInputFile = "" ,
926  MailLogRecipients = "" ,
927  tarball = "" ):
928 
929  #Set up a variable for the FinalExitCode to be used as the sum of exit codes:
930  FinalExitCode=0
931 
932  #Set up the logfile first!
933  if not logfile == None:
934  try:
935  self.logh = open(logfile,"a")
936  except (OSError, IOError) as detail:
937  self.logh.write(detail + "\n")
938  self.logh.flush()
939 
940  #Adding HEPSPEC06 score if available in /build/HEPSPEC06.score file
941  self.HEPSPEC06 = 0 #Set it to 0 by default (so it is easy to catch in the DB too)
942  try:
943  HEPSPEC06_file=open("/build/HEPSPEC06.score","r")
944  for line in HEPSPEC06_file.readlines():
945  if not line.startswith("#") and "HEPSPEC06" in line:
946  self.HEPSPEC06= line.split()[2]
947  except IOError:
948  self.logh.write("***Warning***: Could not find file /build/HEPSPEC06.score file on this machine!\n")
949  self.logh.flush()
950 
951  #Adding a copy of /proc/cpuinfo and /proc/meminfo in the working directory so it can be kept in the tarball on CASTOR:
952  localcpuinfo=os.path.join(perfsuitedir,"cpuinfo")
953  cpuinfo_exitcode=-1
954  if os.path.exists(localcpuinfo):
955  cpuinfo_exitcode=0
956  else:
957  self.logh.write("Copying /proc/cpuinfo in current working directory (%s)\n"%perfsuitedir)
958  cpuinfo_exitcode=self.runcmd("cp /proc/cpuinfo %s"%perfsuitedir)
959  localmeminfo=os.path.join(perfsuitedir,"meminfo")
960  meminfo_exitcode=-1
961  if os.path.exists(localmeminfo):
962  meminfo_exitcode=0
963  else:
964  self.logh.write("Copying /proc/meminfo in current working directory (%s)\n"%perfsuitedir)
965  meminfo_exitcode=self.runcmd("cp /proc/meminfo %s"%perfsuitedir)
966  if cpuinfo_exitcode or meminfo_exitcode:
967  self.logh.write("There was an issue copying the cpuinfo or meminfo files!\n")
968  self.logh.flush()
969 
970  try:
971  if not prevrel == "":
972  self.logh.write("Production of regression information has been requested with release directory %s\n" % prevrel)
973  if not cmsdriverOptions == "":
974  self.logh.write("Running cmsDriver.py with user defined options: %s\n" % cmsdriverOptions)
975  #Attach the full option synthax for cmsRelvalreportInput.py:
976  cmsdriverOptionsRelvalInput="--cmsdriver="+cmsdriverOptions
977  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
978  if not stepOptions == "":
979  self.logh.write("Running user defined steps only: %s\n" % stepOptions)
980  #Attach the full option synthax for cmsRelvalreportInput.py:
981  setpOptionsRelvalInput="--usersteps="+stepOptions
982  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
983  if bypasshlt:
984  #Attach the full option synthax for cmsRelvalreportInput.py:
985  bypasshltRelvalInput="--bypass-hlt"
986  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
987  self.logh.write("Current Architecture is %s\n"%self.cmssw_arch)
988  self.logh.write("Current CMSSW version is %s\n"%self.cmssw_version)
989  self.logh.write("This machine ( %s ) is assumed to have %s cores, and the suite will be run on cpu %s\n" %(self.host,cores,cpus))
990  self.logh.write("This machine's HEPSPEC06 score is: %s \n"%self.HEPSPEC06)
991  path=os.path.abspath(".")
992  self.logh.write("Performance Suite started running at %s on %s in directory %s, run by user %s\n" % (self.getDate(),self.host,path,self.user))
993  #Start the timer for the total performance suite running time:
994  TotalTime=PerfSuiteTimer(start=datetime.datetime.now())
995  #Also initialize the dictionary that will contain all the timing information:
996  global TimerInfo
997  TimerInfo={'TotalTime':{'TotalTime':TotalTime}} #Structure will be {'key':[PerfSuiteTimerInstance,...],...}
998  #Obsolete popen4-> subprocess.Popen
999  #showtags=os.popen4("showtags -r")[1].read()
1000  showtags=subprocess.Popen("showtags -r",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
1001  self.logh.write(showtags) # + "\n") No need for extra \n!
1002  self.logh.flush()
1003  #For the log:
1004  if self._verbose:
1005  self.logh.write("The performance suite results tarball will be stored in CASTOR at %s\n" % self._CASTOR_DIR)
1006  self.logh.write("%s TimeSize events\n" % TimeSizeEvents)
1007  self.logh.write("%s IgProf events\n" % IgProfEvents)
1008  self.logh.write("%s Callgrind events\n" % CallgrindEvents)
1009  self.logh.write("%s Memcheck events\n" % MemcheckEvents)
1010  self.logh.write("%s cmsScimark benchmarks before starting the tests\n" % cmsScimark)
1011  self.logh.write("%s cmsScimarkLarge benchmarks before starting the tests\n" % cmsScimarkLarge)
1012  self.logh.flush()
1013  #Actual script actions!
1014  #Will have to fix the issue with the matplotlib pie-charts:
1015  #Used to source /afs/cern.ch/user/d/dpiparo/w0/perfreport2.1installation/share/perfreport/init_matplotlib.sh
1016  #Need an alternative in the release
1017 
1018  #Code for the architecture benchmarking use-case
1019  if len(cpus) > 1:
1020  for cpu in cpus:
1021  cpupath = os.path.join(perfsuitedir,"cpu_%s" % cpu)
1022  if not os.path.exists(cpupath):
1023  os.mkdir(cpupath)
1024 
1025  self.Commands = {}
1026  AllScripts = self.Scripts + self.AuxiliaryScripts
1027 
1028  for cpu in range(cmsCpuInfo.get_NumOfCores()): #FIXME use the actual number of cores of the machine here!
1029  self.Commands[cpu] = []
1030 
1031  #Information for the log:
1032  self.logh.write("Full path of all the scripts used in this run of the Performance Suite:\n")
1033  for script in AllScripts:
1034  which="which " + script
1035 
1036  #Logging the actual version of cmsDriver.py, cmsRelvalreport.py, cmsSimPyRelVal.pl
1037  #Obsolete popen4-> subprocess.Popen
1038  #whichstdout=os.popen4(which)[1].read()
1039  whichstdout=subprocess.Popen(which,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
1040  self.logh.write(whichstdout) # + "\n") No need of the extra \n!
1041  if script in self.Scripts:
1042  for cpu in range(cmsCpuInfo.get_NumOfCores()):#FIXME use the actual number of cores of the machine here!
1043  command="taskset -c %s %s" % (cpu,script)
1044  self.Commands[cpu].append(command)
1045 
1046  #First submit the cmsScimark benchmarks on the unused cores:
1047  scimark = ""
1048  scimarklarge = ""
1049  if not (self._unittest or self._noexec):
1050  if (len(cpu_list) != cores):
1051  for core in range(cores):
1052  if (not core in cpus) and runonspare:
1053  self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
1054  subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (perfsuitedir, str(core))
1055  command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
1056  self.logh.write(command + "\n")
1057 
1058  #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
1059  #cpus so it makes no sense to try reading its stdout/err
1060  #Obsolete popen4-> subprocess.Popen
1061  #os.popen4(command)
1062  subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
1063 
1064  self.logh.flush()
1065 
1066  #Don't do benchmarking if in debug mode... saves time
1067  benching = not self._debug
1068  ##FIXME:
1069  #We may want to introduce a switch here or agree on a different default (currently 10 cmsScimark and 10 cmsScimarkLarge)
1070  if benching and not (self._unittest or self._noexec):
1071  #Submit the cmsScimark benchmarks on the cpu where the suite will be run:
1072  for cpu in cpus:
1073  scimark = open(os.path.join(perfsuitedir,"cmsScimark2.log") ,"w")
1074  scimarklarge = open(os.path.join(perfsuitedir,"cmsScimark2_large.log"),"w")
1075  if cmsScimark > 0:
1076  self.logh.write("Starting with %s cmsScimark on cpu%s\n" % (cmsScimark,cpu))
1077  cmsScimarkInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
1078  TimerInfo.update({'cmsScimarkTime':{'cmsScimarkInitial':cmsScimarkInitialTime}}) #Add the cmsScimarkInitialTime information to the general TimerInfo dictionary
1079  self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
1080  cmsScimarkInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimark initial timer
1081 
1082  if cmsScimarkLarge > 0:
1083  self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
1084  cmsScimarkLargeInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLarge PerfSuiteTimer
1085  TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeInitial':cmsScimarkLargeInitialTime}) #Add the cmsScimarkLargeInitialTime information to the general TimerInfo dictionary
1086  self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge, large=True)
1087  cmsScimarkLargeInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1088  self.logh.flush()
1089  #Handling the Pile up input file here:
1090  if (TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles) and not ("FASTSIM" in stepOptions):
1091  #Note the FASTSIM exclusion... since there is no need to copy the file for FASTSIM.
1092  PUInputName=os.path.join(perfsuitedir,"INPUT_PILEUP_EVENTS.root")
1093  if PUInputFile:
1094  #Define the actual command to copy the file locally:
1095  #Allow the file to be mounted locally (or accessible via AFS)
1096  copycmd="cp"
1097  #Allow the file to be on CASTOR (taking a full CASTOR path)
1098  if '/store/relval/' in PUInputFile:
1099  #Switching from CASTOR to EOS, i.e. from rfcp to xrdcp
1100  copycmd="xrdcp"
1101  #Accept plain LFNs from DBS for RelVal CASTOR files:
1102  #Minor fix to allow the case of user using the full path /castor/cern.ch/cms...
1103  if PUInputFile.startswith('/store/relval/'):
1104  #Switching to EOS from CASTOR:
1105  #PUInputFile="/castor/cern.ch/cms"+PUInputFile
1106  PUInputFile="root://eoscms//eos/cms"+PUInputFile
1107  #Copy the file locally
1108  self.logh.write("Copying the file %s locally to %s\n"%(PUInputFile,PUInputName))
1109  self.logh.flush()
1110  GetPUInput=subprocess.Popen("%s %s %s"%(copycmd,PUInputFile,PUInputName), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1111  GetPUInputExitCode=GetPUInput.wait()
1112  #Allow even the potential copy of a local file (even one already named INPUT_PILEUP_EVENTS.root!)
1113  if GetPUInputExitCode:
1114  self.logh.write("The copying of the pile-up input file returned a non-zero exit code: %s \nThis is the stdout+stderr if the command:\n%s\n"%(GetPUInputExitCode,GetPUInput.stdout))
1115  #Ultimately accept the case of the file being already there and not being specified in the --PUInputFile option
1116  if not os.path.exists(PUInputName):
1117  self.logh.write("The necessary INPUT_PILEUP_EVENTS.root file was not found in the working directory %s\nExiting now!"%perfsuitedir)
1118  self.logh.flush()
1119  sys.exit(1)
1120  else:
1121  #Set up here the DIGI PILE UP options
1122  self.printFlush("Some PILE UP tests will be run!")
1123  #Actually setting them earlier... when handling options... May not need this else after all... or just as a log entry.
1124  self.printFlush("cmsdriverPUOptions is %s"%cmsdriverPUOptions)
1125  pass
1126 
1127  #TimeSize tests:
1128  if TimeSizeEvents > 0:
1129  TimeSizeTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1130  TimerInfo.update({'TimeSize':{'TotalTime':TimeSizeTime}}) #Add the TimeSize timer to the dictionary
1131  if TimeSizeCandles:
1132  self.logh.write("Launching the TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
1133  NoPileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1134  TimerInfo['TimeSize'].update({'NoPileUpTime':NoPileUpTime}) #Add the TimeSize No Pile Up tests timer to the list
1135  self.printDate()
1136  self.logh.flush()
1137  ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizeCandles,cmsdriverOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
1138  FinalExitCode=FinalExitCode+ReportExit
1139  #Adding a time stamp here to parse for performance suite running time data
1140  self.printFlush("Regular TimeSize tests were finished at %s"%(self.getDate()))
1141  NoPileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1142 
1143  #Launch eventual Digi Pile Up TimeSize too:
1144  if TimeSizePUCandles:
1145  self.logh.write("Launching the PILE UP TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
1146  PileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1147  TimerInfo['TimeSize'].update({'PileUpTime':PileUpTime}) #Add the TimeSize Pile Up tests timer to the list
1148  self.printDate()
1149  self.logh.flush()
1150  ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizePUCandles,cmsdriverPUOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
1151  FinalExitCode=FinalExitCode+ReportExit
1152  #Adding a time stamp here to parse for performance suite running time data
1153  self.printFlush("Pileup TimeSize tests were finished at %s"%(self.getDate()))
1154  PileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1155 
1156  #Check for issue with
1157  if not (TimeSizeCandles or TimeSizePUCandles):
1158  self.printFlush("A number of events (%s) for TimeSize tests was selected, but no candle for regular or pileup tests was selected!"%(TimeSizeEvents))
1159  #Adding a time stamp here to parse for performance suite running time data
1160  self.printFlush("All TimeSize tests were finished at %s"%(self.getDate()))
1161  TimeSizeTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1162 
1163  #Stopping all cmsScimark jobs and analysing automatically the logfiles
1164  #No need to waste CPU while the load does not affect Valgrind measurements!
1165  if not (self._unittest or self._noexec):
1166  self.logh.write("Stopping all cmsScimark jobs now\n")
1167  subcmd = "cd %s ; %s" % (perfsuitedir,self.AuxiliaryScripts[2])
1168  stopcmd = "sh -c \"%s\"" % subcmd
1169  self.printFlush(stopcmd)
1170  #os.popen(stopcmd)
1171  #Obsolete popen4-> subprocess.Popen
1172  #self.printFlush(os.popen4(stopcmd)[1].read())
1173  self.printFlush(subprocess.Popen(stopcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read())
1174 
1175  #From here on we can use all available cores to speed up the performance suite remaining tests:
1176  if cores==0: #When specifying the cpu to run the suite on, one has to set cores to 0 to avoid threading of PerfSuite itself...
1177  #So we need to catch this case for the IB tests case where we assign the test to a specific cpu.
1178  AvailableCores=cpus
1179  elif len(cpu_list) == cores: # For the new relval case, when running all the tests on one machine,
1180  # specifying the same number of cores and cpus (like: --cores 3, --cpu 3,4,5)
1181  AvailableCores=cpus
1182  else:
1183  AvailableCores=range(cores)
1184 
1185  #Initialize a list that will contain all the simpleGenReport keyword arguments (1 dictionary per test):
1186  TestsToDo=[]
1187  #IgProf tests:
1188  if IgProfEvents > 0:
1189  if IgProfCandles:
1190  self.printFlush("Preparing IgProf tests")
1191  #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
1192  #So in general we want to be able to split the perf and mem tests...
1193  #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
1194  if profilers:
1195  self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
1196  #Prepare the simpleGenReport arguments for this test:
1197  IgProfProfilerArgs={
1198  'perfdir':perfsuitedir,
1199  'NumEvents':IgProfEvents,
1200  'candles':IgProfCandles,
1201  'cmsdriverOptions':cmsdriverOptions,
1202  'stepOptions':stepOptions,
1203  'Name':"IgProf",
1204  'profilers':profilers,
1205  'bypasshlt':bypasshlt,
1206  'userInputRootFiles':userInputFile
1207  }
1208  #Append the test to the TestsToDo list:
1209  TestsToDo.append(IgProfProfilerArgs)
1210  self.printFlush("Appended IgProf test with profiler option %s to the TestsToDo list"%profilers)
1211  #For the default case (4,5,6,7) we split the tests into 2 jobs since they naturally are 2 cmsRun jobs and for machines with many cores this will
1212  #make the performance suite run faster.
1213  else:
1214  self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
1215  ##PERF##
1216  #Prepare the simpleGenReport arguments for this test:
1217  IgProfPerfArgs={
1218  'perfdir':perfsuitedir,
1219  'NumEvents':IgProfEvents,
1220  'candles':IgProfCandles,
1221  'cmsdriverOptions':cmsdriverOptions,
1222  'stepOptions':stepOptions,
1223  'Name':"IgProf_Perf",
1224  'profilers':profilers,
1225  'bypasshlt':bypasshlt,
1226  'userInputRootFiles':userInputFile
1227  }
1228  #Append the test to the TestsToDo list:
1229  TestsToDo.append(IgProfPerfArgs)
1230  self.printFlush("Appended IgProf PERF test to the TestsToDo list")
1231  ##MEM##
1232  #Prepare the simpleGenReport arguments for this test:
1233  IgProfMemArgs={
1234  'perfdir':perfsuitedir,
1235  'NumEvents':IgProfEvents,
1236  'candles':IgProfCandles,
1237  'cmsdriverOptions':cmsdriverOptions,
1238  'stepOptions':stepOptions,
1239  'Name':"IgProf_Mem",
1240  'profilers':profilers,
1241  'bypasshlt':bypasshlt,
1242  'userInputRootFiles':userInputFile
1243  }
1244  #Append the test to the TestsToDo list:
1245  TestsToDo.append(IgProfMemArgs)
1246  self.printFlush("Appended IgProf MEM test to the TestsToDo list")
1247  #The following will be handled in the while loop that handles the starting of the threads:
1248  #ReportExit=self.simpleGenReport(cpus,perfsuitedir,IgProfEvents,IgProfCandles,cmsdriverOptions,stepOptions,"IgProf",profilers,bypasshlt,userInputFile)
1249  #FinalExitCode=FinalExitCode+ReportExit
1250  #Launch eventual Digi Pile Up IgProf too:
1251  if IgProfPUCandles:
1252  self.printFlush("Preparing IgProf PileUp tests")
1253  #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
1254  #So in general we want to be able to split the perf and mem tests...
1255  #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
1256  if profilers:
1257  self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
1258  #Prepare the simpleGenReport arguments for this test:
1259  IgProfProfilerPUArgs={
1260  'perfdir':perfsuitedir,
1261  'NumEvents':IgProfEvents,
1262  'candles':IgProfPUCandles,
1263  'cmsdriverOptions':cmsdriverPUOptions,
1264  'stepOptions':stepOptions,
1265  'Name':"IgProf",
1266  'profilers':profilers,
1267  'bypasshlt':bypasshlt,
1268  'userInputRootFiles':userInputFile
1269  }
1270  #Append the test to the TestsToDo list:
1271  TestsToDo.append(IgProfProfilerPUArgs)
1272  self.printFlush("Appended IgProf PileUp test with profiler option %s to the TestsToDo list"%profilers)
1273  else:
1274  self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
1275  ##PERF##
1276  #Prepare the simpleGenReport arguments for this test:
1277  IgProfPerfPUArgs={
1278  'perfdir':perfsuitedir,
1279  'NumEvents':IgProfEvents,
1280  'candles':IgProfPUCandles,
1281  'cmsdriverOptions':cmsdriverPUOptions,
1282  'stepOptions':stepOptions,
1283  'Name':"IgProf_Perf",
1284  'profilers':profilers,
1285  'bypasshlt':bypasshlt,
1286  'userInputRootFiles':userInputFile
1287  }
1288  #Append the test to the TestsToDo list:
1289  TestsToDo.append(IgProfPerfPUArgs)
1290  self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
1291  ##MEM##
1292  #Prepare the simpleGenReport arguments for this test:
1293  IgProfMemPUArgs={
1294  'perfdir':perfsuitedir,
1295  'NumEvents':IgProfEvents,
1296  'candles':IgProfPUCandles,
1297  'cmsdriverOptions':cmsdriverPUOptions,
1298  'stepOptions':stepOptions,
1299  'Name':"IgProf_Mem",
1300  'profilers':profilers,
1301  'bypasshlt':bypasshlt,
1302  'userInputRootFiles':userInputFile
1303  }
1304  #Append the test to the TestsToDo list:
1305  TestsToDo.append(IgProfMemPUArgs)
1306  self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
1307  if not (IgProfCandles or IgProfPUCandles):
1308  self.printFlush("A number of events (%s) for IgProf tests was selected, but no candle for regular or pileup tests was selected!"%(IgProfEvents))
1309 
1310 
1311  #Valgrind tests:
1312  if CallgrindEvents > 0:
1313  if CallgrindCandles:
1314  self.printFlush("Preparing Callgrind tests")
1315  CallgrindArgs={
1316  'perfdir':perfsuitedir,
1317  'NumEvents':CallgrindEvents,
1318  'candles':CallgrindCandles,
1319  'cmsdriverOptions':cmsdriverOptions,
1320  'stepOptions':stepOptions,
1321  'Name':"Callgrind",
1322  'profilers':profilers,
1323  'bypasshlt':bypasshlt,
1324  'userInputRootFiles':userInputFile
1325  }
1326  #Append the test to the TestsToDo list:
1327  TestsToDo.append(CallgrindArgs)
1328  self.printFlush("Appended Callgrind test to the TestsToDo list")
1329  #Launch eventual Digi Pile Up Callgrind too:
1330  if CallgrindPUCandles:
1331  self.printFlush("Preparing Callgrind PileUp tests")
1332  CallgrindPUArgs={
1333  'perfdir':perfsuitedir,
1334  'NumEvents':CallgrindEvents,
1335  'candles':CallgrindPUCandles,
1336  'cmsdriverOptions':cmsdriverPUOptions,
1337  'stepOptions':stepOptions,
1338  'Name':"Callgrind",
1339  'profilers':profilers,
1340  'bypasshlt':bypasshlt,
1341  'userInputRootFiles':userInputFile
1342  }
1343  #Append the test to the TestsToDo list:
1344  TestsToDo.append(CallgrindPUArgs)
1345  self.printFlush("Appended Callgrind PileUp test to the TestsToDo list")
1346  if not (CallgrindCandles or CallgrindPUCandles):
1347  self.printFlush("A number of events (%s) for Callgrind tests was selected, but no candle for regular or pileup tests was selected!"%(CallgrindEvents))
1348 
1349  if MemcheckEvents > 0:
1350  if MemcheckCandles:
1351  self.printFlush("Preparing Memcheck tests")
1352  MemcheckArgs={
1353  'perfdir':perfsuitedir,
1354  'NumEvents':MemcheckEvents,
1355  'candles':MemcheckCandles,
1356  'cmsdriverOptions':cmsdriverOptions,
1357  'stepOptions':stepOptions,
1358  'Name':"Memcheck",
1359  'profilers':profilers,
1360  'bypasshlt':bypasshlt,
1361  'userInputRootFiles':userInputFile
1362  }
1363  #Append the test to the TestsToDo list:
1364  TestsToDo.append(MemcheckArgs)
1365  self.printFlush("Appended Memcheck test to the TestsToDo list")
1366  #Launch eventual Digi Pile Up Memcheck too:
1367  if MemcheckPUCandles:
1368  self.printFlush("Preparing Memcheck PileUp tests")
1369  MemcheckPUArgs={
1370  'perfdir':perfsuitedir,
1371  'NumEvents':MemcheckEvents,
1372  'candles':MemcheckPUCandles,
1373  'cmsdriverOptions':cmsdriverPUOptions,
1374  'stepOptions':stepOptions,
1375  'Name':"Memcheck",
1376  'profilers':profilers,
1377  'bypasshlt':bypasshlt,
1378  'userInputRootFiles':userInputFile
1379  }
1380  #Append the test to the TestsToDo list:
1381  TestsToDo.append(MemcheckPUArgs)
1382  self.printFlush("Appended Memcheck PileUp test to the TestsToDo list")
1383  if not (MemcheckCandles or MemcheckPUCandles):
1384  self.printFlush("A number of events (%s) for Memcheck tests was selected, but no candle for regular or pileup tests was selected!"%(MemcheckEvents))
1385 
1386  #Here if there are any IgProf, Callgrind or MemcheckEvents to be run,
1387  #run the infinite loop that submits the PerfTest() threads on the available cores:
1388  if IgProfEvents or CallgrindEvents or MemcheckEvents:
1389  #FIXME:We should consider what behavior makes most sense in case we use the --cores option at this time only the cores=0 care is considered...
1390  self.printFlush("Threading all remaining tests on all %s available cores!"%len(AvailableCores))
1391  self.printDate()
1392  self.logh.flush()
1393  #Save the original AvailableCores list to use it as a test to break the infinite loop:
1394  #While in the regular RelVal use-case it makes sense to use the actual number of cores of the machines, in
1395  #the IB case the AvailableCores will always consist of only 1 single core..
1396  OriginalAvailableCores=list(AvailableCores) #Tricky list copy bug! without the list() OriginalAvalaibleCores would point to AvailableCores!
1397  #Print this out in the log for debugging reasons
1398  self.printFlush("Original available cores list: %s"%AvailableCores)
1399 
1400  #Create a dictionary to keep track of running threads on the various cores:
1401  activePerfTestThreads={}
1402  #Flag for waiting messages:
1403  Waiting=False
1404  while 1:
1405  #Check if there are tests to run:
1406  if TestsToDo:
1407  #Using the Waiting flag to avoid writing this message every 5 seconds in the case
1408  #of having more tests to do than available cores...
1409  if not Waiting:
1410  self.printFlush("Currently %s tests are scheduled to be run:"%len(TestsToDo))
1411  self.printFlush(TestsToDo)
1412  #Check the available cores:
1413  if AvailableCores:
1414  #Set waiting flag to False since we'll be doing something
1415  Waiting=False
1416  self.printFlush("There is/are %s core(s) available"%len(AvailableCores))
1417  cpu=AvailableCores.pop()
1418  self.printFlush("Let's use cpu %s"%cpu)
1419  simpleGenReportArgs=TestsToDo.pop()
1420  self.printFlush("Let's submit %s test on core %s"%(simpleGenReportArgs['Name'],cpu))
1421  #Adding a Total timer for each of the threaded tests:
1422  if simpleGenReportArgs['Name'] not in TimerInfo.keys():
1423  #if 'TotalTime' not in TimerInfo[simpleGenReportArgs['Name']].keys():
1424  self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1425  TimerInfo.update({simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary
1426  threadToDo=self.simpleGenReportThread(cpu,self,**simpleGenReportArgs) #Need to send self too, so that the thread has access to the PerfSuite.simpleGenReport() function
1427  self.printFlush("Starting thread %s"%threadToDo)
1428  ReportExitCode=threadToDo.start()
1429  self.printFlush("Adding thread %s to the list of active threads"%threadToDo)
1430  activePerfTestThreads[cpu]=threadToDo
1431  #If there is no available core, pass, there will be some checking of activeThreads, a little sleep and then another check.
1432  else:
1433  pass
1434  #Test activePerfThreads:
1435  activeTestNames=[]
1436  activeTestNamesPU=[]
1437  for cpu in activePerfTestThreads.keys():
1438  if activePerfTestThreads[cpu].isAlive():
1439  #print "%% cpu %s activerPerfTestThreads[cpu] %s activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'] %s"%(cpu,activePerfTestThreads[cpu],activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'])
1440  if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
1441  activeTestNamesPU.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1442  else:
1443  activeTestNames.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1444  pass
1445  elif cpu not in AvailableCores:
1446  #Set waiting flag to False since we'll be doing something
1447  Waiting=False
1448  self.printFlush(time.ctime())
1449  self.printFlush("%s test, in thread %s is done running on core %s"%(activePerfTestThreads[cpu].simpleGenReportArgs['Name'],activePerfTestThreads[cpu],cpu) )
1450  self.printFlush("About to append cpu %s to AvailableCores list"%cpu)
1451  AvailableCores.append(cpu)
1452  #Eliminate from activeTestNames lists:
1453  #print activeTestNames
1454  #print activeTestNamesPU
1455  #print activePerfTestThreads[cpu].simpleGenReportArgs['Name']
1456  if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
1457  try:
1458  activeTestNamesPU.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1459  except:
1460  pass
1461  else:
1462  try:
1463  activeTestNames.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1464  except:
1465  pass
1466  #Eliminate also from activePErfTestThreads dictionary:
1467  activePerfTestThreads.pop(cpu)
1468  #FIXME:
1469  #Delicate check to stop the timer on the individual threaded test!
1470  #Need to thik about it still...
1471  #FIXME:
1472  #Delicate check to stop the timers on the threaded tests:
1473  #Check activePerfTestThreads dictionary for "Name" if any name is missing, the total can be stopped for that name.
1474  #self.PerfTestTotalTimer
1475  for TestName in ["IgProf_Perf","IgProf_Mem","Memcheck","Valgrind"]:
1476  if (TestName not in activeTestNames) and (TestName not in activeTestNamesPU) :
1477  try:
1478  TimerInfo[TestName]['TotalTime'].set_end(datetime.datetime.now())
1479  except:
1480  #print "No %s test was running"%TestName
1481  pass
1482  #Buggy if... it seems we don't wait for the running thread to be finished...
1483  #We should request:
1484  #-All OriginalAvailableCores should be actually available.
1485  if not AvailableCores==[] and (set(AvailableCores)==set(range(cmsCpuInfo.get_NumOfCores())) or set(AvailableCores)==set(OriginalAvailableCores)) and not TestsToDo:
1486  self.printFlush("PHEW! We're done... all TestsToDo are done... at %s "%(self.getDate()))
1487  #Debug printouts:
1488  #print "AvailableCores",AvailableCores
1489  #print "set(AvailableCores)",set(AvailableCores)
1490  #print "set(range(cmsCpuInfo.get_NumOfCores())",set(range(cmsCpuInfo.get_NumOfCores()))
1491  #print "OriginalAvailableCores",OriginalAvailableCores
1492  #print "set(OriginalAvailableCores)",set(OriginalAvailableCores)
1493  #print "TestsToDo",TestsToDo
1494  break
1495  else:
1496  #Putting the sleep statement first to avoid writing Waiting... before the output of the started thread reaches the log...
1497  time.sleep(5)
1498  #Use Waiting flag to writing 1 waiting message while waiting and avoid having 1 message every 5 seconds...
1499  if not Waiting:
1500  self.printFlush(time.ctime())
1501  self.printFlush("Waiting for tests to be done...")
1502  sys.stdout.flush()
1503  Waiting=True
1504  #End of the if for IgProf, Callgrind, Memcheck tests
1505 
1506  if benching and not (self._unittest or self._noexec):
1507  #Ending the performance suite with the cmsScimark benchmarks again:
1508  for cpu in cpus:
1509  if cmsScimark > 0:
1510  self.logh.write("Ending with %s cmsScimark on cpu%s\n" % (cmsScimark,cpu))
1511  cmsScimarkFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
1512  TimerInfo['cmsScimarkTime'].update({'cmsScimarkFinal':cmsScimarkFinalTime}) #Add the cmsScimarkFinalTime information to the general TimerInfo dictionary
1513 
1514  self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
1515  cmsScimarkFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1516  if cmsScimarkLarge > 0:
1517  self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
1518  cmsScimarkLargeFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLargePerfSuiteTimer
1519  TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeFinal':cmsScimarkLargeFinalTime}) #Add the cmsScimarkLargeFinalTime information to the general TimerInfo dictionary
1520  self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge,large=True)
1521  cmsScimarkLargeFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1522 
1523  if prevrel:
1524  self.logh.write("Running the regression analysis with respect to %s\n"%getVerFromLog(prevrel))
1525  self.logh.write(time.ctime(time.time()))
1526  self.logh.flush()
1527 
1528  crr.regressReports(prevrel,os.path.abspath(perfsuitedir),oldRelName = getVerFromLog(prevrel),newRelName=self.cmssw_version)
1529 
1530  #Create a tarball of the work directory
1531  if tarball:
1532  tarballTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the tarball PerfSuiteTimer
1533  TimerInfo.update({'tarballTime':{'TotalTime':tarballTime}})
1534  # Adding the str(stepOptions to distinguish the tarballs for 1 release
1535  # (GEN->DIGI, L1->RECO will be run in parallel)
1536 
1537  # Cleaning the stepOptions from the --usersteps=:
1538  if "=" in str(stepOptions):
1539  fileStepOption=str(stepOptions).split("=")[1]
1540  else:
1541  fileStepOption=str(stepOptions)
1542  if fileStepOption=="":
1543  fileStepOption="UnknownStep"
1544  # Add the working directory used to avoid overwriting castor files (also put a check...)
1545  fileWorkingDir=os.path.basename(perfsuitedir)
1546 
1547  # Also add the --conditions and --eventcontent options used in the --cmsdriver options since it
1548  # is possible that the same tests will be run with different conditions and/or event content:
1549  # Parse it out of --cmsdriver option:
1550  fileEventContentOption="UnknownEventContent"
1551  fileConditionsOption="UnknownConditions"
1552  for token in cmsdriverOptions.split("--"):
1553  if token!='' and 'cmsdriver' not in token:
1554  if "=" in token:
1555  fileOption=token.split("=")[0]
1556  fileOptionValue=token.split("=")[1].strip("'").strip('"')
1557  else:
1558  fileOption=token.split()[0]
1559  fileOptionValue=token.split()[1].strip("'").strip('"')
1560  if "eventcontent" or "conditions" in fileOption:
1561  if "eventcontent" in fileOption:
1562  fileEventContentOption=fileOptionValue
1563  elif "conditions" in fileOption:
1564  # check if we are using the autoCond style of flexible conditions
1565  # if so, expand the condition here so that the file names contain the real conditions
1566  if "auto:" in fileOptionValue:
1567  from Configuration.AlCa.autoCond import autoCond
1568  fileConditionsOption = autoCond[ fileOptionValue.split(':')[1] ]
1569  else:
1570  # "old style" conditions, hardcoded values ...
1571  # FIXME:
1572  # Should put at least the convention in cmsPerfCommons to know how to parse it...
1573  # Potential weak point if the conditions tag convention changes...
1574  if "," in fileOptionValue: #Since 330, conditions don't have FrontierConditions_GlobalTag, in front of them anymore...
1575  fileConditionsOption=fileOptionValue.split("::")[0].split(",")[1] #"Backward" compatibility
1576  else:
1577  fileConditionsOption=fileOptionValue.split("::")[0]
1578  else: # empty token
1579  #print "Print this is the token: %s"%token
1580  pass
1581 
1582  #self.printFlush("Conditions label to add to the tarball name is %s"%fileConditionsOption)
1583  #self.printFlush("Eventcontent label to add to the tarball name is %s"%fileEventContentOption)
1584  #FIXME:
1585  #Could add the allowed event contents in the cmsPerfCommons.py file and use those to match in the command line options... This assumes maintenance of cmsPerfCommons.py
1586 
1587 
1588  #Create a tarball with just the logfiles
1589  subprocess.Popen("ls -R | grep .root > rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1590  LogFile = "%s_%s_%s_%s_%s_%s_%s_%s_log.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
1591  AbsTarFileLOG = os.path.join(perfsuitedir,LogFile)
1592  tarcmd = "tar zcfX %s %s %s" %(AbsTarFileLOG, "rootFiles", os.path.join(perfsuitedir,"*"))
1593  self.printFlush("Creating a tarball for the logfiles")
1594  self.printFlush(tarcmd)
1595  self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1596  self.printFlush(subprocess.Popen("rm rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1597 
1598  fullcastorpathlog=os.path.join(castordir,LogFile)
1599 
1600 
1601  #Create the tarball with the contents of the directory + md5 checksum
1602  TarFile = "%s_%s_%s_%s_%s_%s_%s_%s.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
1603  AbsTarFile = os.path.join(perfsuitedir,TarFile)
1604  tarcmd = "tar -zcf %s %s" %(AbsTarFile, os.path.join(perfsuitedir,"*"))
1605  md5cmd = "md5sum %s" %(AbsTarFile)
1606  self.printFlush("Creating a tarball with the content of the directory")
1607  self.printFlush(tarcmd)
1608  self.printFlush(md5cmd)
1609  #FIXME:
1610  #Anything that will be logged after the tar command below will not enter the cmsPerfSuite.log in the tarball (by definition)...
1611  #To remain backward compatible the harvesting script needs to be based on the command above to identify the tarball location.
1612  #Obsolete popen4-> subprocess.Popen
1613  #self.printFlush(os.popen3(tarcmd)[2].read()) #Using popen3 to get only stderr we don't want the whole stdout of tar!
1614  self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1615  md5sum = subprocess.Popen(md5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read().split()[0]
1616  self.printFlush("The md5 checksum of the tarball: %s" %(md5sum))
1617  AbsTarFileMD5 = AbsTarFile + ".md5"
1618  md5filecmd = "echo %s > %s" % (md5sum, AbsTarFileMD5)
1619  self.printFlush(subprocess.Popen(md5filecmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1620 
1621  #Archive it on CASTOR
1622  #Before archiving check if it already exist if it does print a message, but do not overwrite, so do not delete it from local dir:
1623  fullcastorpathfile=os.path.join(castordir,TarFile)
1624  fullcastorpathmd5=os.path.join(castordir,TarFile + ".md5")
1625 
1626  checkcastor="nsls %s" % fullcastorpathfile
1627  #Obsolete os.popen-> subprocess.Popen
1628  #checkcastorout=os.popen3(checkcastor)[1].read()
1629  checkcastorout=subprocess.Popen(checkcastor,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read()
1630  if checkcastorout.rstrip()==fullcastorpathfile:
1631  castorcmdstderr="File %s is already on CASTOR! Will NOT OVERWRITE!!!"%fullcastorpathfile
1632  else:
1633  #Switching from CASTOR TO EOS, i.e. rfcp to xrdcp!
1634  #Not YET!!!
1635  #FIXME! Migrate to EOS eventually, taking into account implications for PerfDB logs linking!
1636  castorcmd="rfcp %s %s" % (AbsTarFile,fullcastorpathfile)
1637  castormd5cmd="rfcp %s %s" % (AbsTarFileMD5,fullcastorpathmd5)
1638  castorlogcmd="rfcp %s %s" % (AbsTarFileLOG,fullcastorpathlog)
1639  self.printFlush(castorcmd)
1640  self.printFlush(castormd5cmd)
1641  self.printFlush(castorlogcmd)
1642  #Obsolete os.popen-> subprocess.Popen
1643  #castorcmdstderr=os.popen3(castorcmd)[2].read()
1644  castorcmdstderr=subprocess.Popen(castorcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1645  subprocess.Popen(castormd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1646  subprocess.Popen(castorlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1647  #Checking the stderr of the rfcp command to copy the tarball (.tgz) on CASTOR:
1648  if castorcmdstderr:
1649  #If it failed print the stderr message to the log and tell the user the tarball (.tgz) is kept in the working directory
1650  self.printFlush(castorcmdstderr)
1651  self.printFlush("Since the CASTOR archiving for the tarball failed the file %s is kept in directory %s"%(TarFile, perfsuitedir))
1652  else:
1653  #If it was successful then remove the tarball from the working directory:
1654  self.printFlush("Successfully archived the tarball %s in CASTOR!"%(TarFile))
1655  self.printFlush("The tarball can be found: %s"%(fullcastorpathfile))
1656  self.printFlush("The logfile can be found: %s"%(fullcastorpathlog))
1657  self.printFlush("Deleting the local copy of the tarballs")
1658  rmtarballcmd="rm -Rf %s"%(AbsTarFile)
1659  rmtarballmd5cmd="rm -Rf %s"%(AbsTarFileMD5)
1660  rmtarballlogcmd="rm -Rf %s"%(AbsTarFileLOG)
1661  self.printFlush(rmtarballcmd)
1662  self.printFlush(rmtarballmd5cmd)
1663  self.printFlush(rmtarballlogcmd)
1664  #Obsolete os.popen-> subprocess.Popen
1665  #self.printFlush(os.popen4(rmtarballcmd)[1].read())
1666  self.printFlush(subprocess.Popen(rmtarballcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1667  self.printFlush(subprocess.Popen(rmtarballmd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1668  self.printFlush(subprocess.Popen(rmtarballlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1669  tarballTime.set_end(datetime.datetime.now())
1670  else:
1671  self.printFlush("Performance Suite directory will not be archived in a tarball since --no_tarball option was chosen")
1672 
1673  #End of script actions!
1674 
1675  #Print a time stamp at the end:
1676  date=time.ctime(time.time())
1677  self.logh.write("Performance Suite finished running at %s on %s in directory %s\n" % (date,self.host,path))
1678  if self.ERRORS == 0:
1679  self.logh.write("There were no errors detected in any of the log files!\n")
1680  else:
1681  self.logh.write("ERROR: There were %s errors detected in the log files, please revise!\n" % self.ERRORS)
1682  #print "No exit code test"
1683  #sys.exit(1)
1684  except exceptions.Exception as detail:
1685  self.logh.write(str(detail) + "\n")
1686  self.logh.flush()
1687  if not self.logh.isatty():
1688  self.logh.close()
1689  raise
1690  #Add the possibility to send as an email the execution logfile to the user and whoever else interested:
1691  if MailLogRecipients != "": #Basically leave the option to turn it off too.. --mail ""
1692  self.printFlush("Sending email notification for this execution of the performance suite with command:")
1693  sendLogByMailcmd='cat cmsPerfSuite.log |mail -s "Performance Suite finished running on %s" '%self.host + MailLogRecipients
1694  self.printFlush(sendLogByMailcmd)
1695  #Obsolete os.popen-> subprocess.Popen
1696  #self.printFlush(os.popen4(sendLogByMailcmd)[1].read())
1697  self.printFlush(subprocess.Popen(sendLogByMailcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1698  else:
1699  self.printFlush('No email notification will be sent for this execution of the performance suite since option --mail "" was used')
1700 
1701  TotalTime.set_end(datetime.datetime.now())
1702  self.printFlush("Total Running Time\t%s hrs (%s mins)"%(TotalTime.get_duration()['hours'],TotalTime.get_duration()['minutes']))
1703 
1704  #Dump of the TimerInfo information
1705  #First dump it as a pickleq file...
1706  #Well in order to do so without the complication of serializing a custom class instance need to make the dictionary fully string-made:
1707  TimerInfoStr={}
1708  PerfSuiteTimerInfo=open("PerfSuiteTimerInfo.pkl","wb")
1709  #pickle.dump(TimerInfo,PerfSuiteTimerInfo)
1710  #PerfSuiteTimerInfo.close()
1711  #For now print it at the bottom of the log:
1712  self.logh.write("Test type\tActual Test\tDuration\tStart Time\tEnd Time\n")
1713  for key in TimerInfo.keys():
1714  #self.printFlush(key)
1715  TimerInfoStr.update({key:{}})
1716  for test in TimerInfo[key].keys():
1717  TimerInfoStr[key].update({test:[str(TimerInfo[key][test].get_duration()['hours'])+" hrs ("+str(TimerInfo[key][test].get_duration()['minutes'])+" mins)",TimerInfo[key][test].get_start(),TimerInfo[key][test].get_end()]})
1718  self.logh.write(key+"\t"+test+"\t")
1719  self.logh.write("%s hrs (%s mins)\t"%(TimerInfo[key][test].get_duration()['hours'],TimerInfo[key][test].get_duration()['minutes']))
1720  self.logh.write("%s\t"%TimerInfo[key][test].get_start())
1721  self.logh.write("%s\n"%TimerInfo[key][test].get_end())
1722  pickle.dump(TimerInfoStr,PerfSuiteTimerInfo)
1723  PerfSuiteTimerInfo.close()
1724 
1725  self.logh.write("Final Performance Suite exit code was %s"%FinalExitCode)
1726  self.logh.flush()
1727  sys.exit(FinalExitCode)
1728 
1729 def main(argv=[__name__]): #argv is a list of arguments.
1730  #Valid ways to call main with arguments:
1731  #main(["--cmsScimark",10])
1732  #main(["-t100"]) #With the caveat that the options.timeSize will be of type string... so should avoid using this!
1733  #main(["-timeSize,100])
1734  #Invalid ways:
1735  #main(["One string with all options"])
1736 
1737  #Let's instatiate the class:
1738  suite=PerfSuite()
1739 
1740  #print suite
1741  #Uncomment this for tests with main() in inteactive python:
1742  #print suite.optionParse(argv)
1743 
1744  PerfSuiteArgs={}
1745  (PerfSuiteArgs['create'],
1746  PerfSuiteArgs['castordir'],
1747  PerfSuiteArgs['TimeSizeEvents'],
1748  PerfSuiteArgs['IgProfEvents'],
1749  PerfSuiteArgs['CallgrindEvents'],
1750  PerfSuiteArgs['MemcheckEvents'],
1751  PerfSuiteArgs['cmsScimark'],
1752  PerfSuiteArgs['cmsScimarkLarge'],
1753  PerfSuiteArgs['cmsdriverOptions'],
1754  PerfSuiteArgs['cmsdriverPUOptions'],
1755  PerfSuiteArgs['stepOptions'],
1756  PerfSuiteArgs['quicktest'],
1757  PerfSuiteArgs['profilers'],
1758  PerfSuiteArgs['cpus'],
1759  PerfSuiteArgs['cores'],
1760  PerfSuiteArgs['prevrel'],
1761  PerfSuiteArgs['bypasshlt'],
1762  PerfSuiteArgs['runonspare'],
1763  PerfSuiteArgs['perfsuitedir'],
1764  PerfSuiteArgs['logfile'],
1765  PerfSuiteArgs['TimeSizeCandles'],
1766  PerfSuiteArgs['IgProfCandles'],
1767  PerfSuiteArgs['CallgrindCandles'],
1768  PerfSuiteArgs['MemcheckCandles'],
1769  PerfSuiteArgs['TimeSizePUCandles'],
1770  PerfSuiteArgs['IgProfPUCandles'],
1771  PerfSuiteArgs['CallgrindPUCandles'],
1772  PerfSuiteArgs['MemcheckPUCandles'],
1773  PerfSuiteArgs['PUInputFile'],
1774  PerfSuiteArgs['userInputFile'],
1775  PerfSuiteArgs['MailLogRecipients'],
1776  PerfSuiteArgs['tarball']
1777  ) = suite.optionParse(argv)
1778 
1779  if PerfSuiteArgs['create']: # Before anything, request the AFS volume (it takes some time...)
1780  suite.createIgVolume()
1781 
1782  if not PerfSuiteArgs['logfile'] == None:
1783  if os.path.exists(PerfSuiteArgs['logfile']):
1784  oldlogfile=PerfSuiteArgs['logfile']+"_"+time.strftime("%d-%m-%Y_%H:%M:%S")
1785  #Move old logfile to a file with the same filename plus a timestamp appended
1786  mvOldLogfile=subprocess.Popen("mv %s %s"%(PerfSuiteArgs['logfile'],oldlogfile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1787  mvOldLogfileExitCode=mvOldLogfile.wait()
1788  #Finally open the logfile and put the information above in it:
1789  try:
1790  ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
1791  if mvOldLogfileExitCode:
1792  ActualLogfile.write("Please check what happened: A file named %s existed already and the attempt to move it to %s produced the following output: %s\n"%(PerfSuiteArgs['logfile'],oldlogfile,mvOldLogfile.stdout))
1793  else:
1794  ActualLogfile.write("***WARNING! A file named %s existed already!\n***It has been moved to %s before starting the current logfile!\n"%(PerfSuiteArgs['logfile'],oldlogfile))
1795  except (OSError, IOError) as detail:
1796  ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
1797 
1798  else:
1799  try:
1800  ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
1801  except (OSError, IOError) as detail:
1802  ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
1803  ActualLogfile.flush()
1804 
1805  #Three lines to add the exact command line used to call the performance suite directly in the log.
1806  ActualLogfile.write("Performance suite invoked with command line:\n")
1807  cmdline=reduce(lambda x,y:x+" "+y,sys.argv)
1808  ActualLogfile.write(cmdline+"\n")
1809  ActualLogfile.flush()
1810 
1811  #Debug printout that we could silence...
1812  ActualLogfile.write("Initial PerfSuite Arguments:\n")
1813  for key in PerfSuiteArgs.keys():
1814  ActualLogfile.write("%s %s\n"%(key,PerfSuiteArgs[key]))
1815  ActualLogfile.flush()
1816  #print PerfSuiteArgs
1817 
1818  PerfSuiteArgs['cpu_list'] = PerfSuiteArgs['cpus'] #To access the actual number of cpus used inside the threads..
1819 
1820  #Handle in here the case of multiple cores and the loading of cores with cmsScimark:
1821  if len(PerfSuiteArgs['cpus']) > 1:
1822  ActualLogfile.write("More than 1 cpu: threading the Performance Suite!\n")
1823  outputdir=PerfSuiteArgs['perfsuitedir']
1824  runonspare=PerfSuiteArgs['runonspare'] #Save the original value of runonspare for cmsScimark stuff
1825  cpus=PerfSuiteArgs['cpus']
1826  cores=PerfSuiteArgs['cores']
1827  if runonspare:
1828  for core in range(PerfSuiteArgs['cores']):
1829  cmsScimarkLaunch_pslist={}
1830  if len(cpus) != cores: #In case of this (relval), don't load the others with cmsScimark
1831  if (core not in cpus):
1832  #self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
1833  ActualLogfile.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core)+"\n")
1834  subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (outputdir, str(core))
1835  command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
1836  #self.logh.write(command + "\n")
1837  ActualLogfile.write(command+"\n")
1838  #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
1839  #cpus so it makes no sense to try reading its stdout/err
1840  cmsScimarkLaunch_pslist[core]=subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
1841  ActualLogfile.write("Spawned %s \n with PID %s"%(command,cmsScimarkLaunch_pslist[core].pid))
1842  ActualLogfile.flush()
1843  PerfSuiteArgs['runonspare']=False #Set it to false to avoid cmsScimark being spawned by each thread
1844  logfile=PerfSuiteArgs['logfile']
1845  suitethread={}
1846  for cpu in cpus:
1847  #Make arguments "threaded" by setting for each instance of the suite:
1848  #1-A different output (sub)directory
1849  #2-Only 1 core on which to run
1850  #3-Automatically have a logfile... otherwise stdout is lost?
1851  #To be done:[3-A flag for Valgrind not to "thread" itself onto the other cores..]
1852  cpudir = os.path.join(outputdir,"cpu_%s" % cpu)
1853  if not os.path.exists(cpudir):
1854  os.mkdir(cpudir)
1855  PerfSuiteArgs['perfsuitedir']=cpudir
1856  PerfSuiteArgs['cpus']=[cpu] #Keeping the name cpus for now FIXME: change it to cpu in the whole code
1857  if PerfSuiteArgs['logfile']:
1858  PerfSuiteArgs['logfile']=os.path.join(cpudir,os.path.basename(PerfSuiteArgs['logfile']))
1859  else:
1860  PerfSuiteArgs['logfile']=os.path.join(cpudir,"cmsPerfSuiteThread.log")
1861  #Now spawn the thread with:
1862  suitethread[cpu]=PerfThread(**PerfSuiteArgs)
1863  #ActualLogfile.write(suitethread[cpu])
1864  ActualLogfile.write("Launching PerfSuite thread on cpu%s"%cpu)
1865  ActualLogfile.flush()
1866  #print "With arguments:"
1867  #print PerfSuiteArgs
1868  suitethread[cpu].start()
1869 
1870  while reduce(lambda x,y: x or y, map(lambda x: x.isAlive(),suitethread.values())):
1871  try:
1872  time.sleep(5.0)
1873  sys.stdout.flush()
1874  except (KeyboardInterrupt, SystemExit):
1875  raise
1876  ActualLogfile.write("All PerfSuite threads have completed!\n")
1877  ActualLogfile.flush()
1878 
1879  else: #No threading, just run the performance suite on the cpu core selected
1880  suite.runPerfSuite(**PerfSuiteArgs)
1881 
1882 if __name__ == "__main__":
1883 
1884  main(sys.argv)
Definition: start.py:1
def runCmdSet(self, cmd)
Run a list of commands using system ! We should rewrite this not to use system (most cases it is unne...
def getVerFromLog(previous)
def benchmarks(self, cpu, pfdir, name, bencher, large=False)
Run cmsScimark benchmarks a number of times.
def runCmsInput(self, cpu, dir, numevents, candle, cmsdrvopts, stepopt, profiles, bypasshlt, userInputFile)
Wrapper for cmsRelvalreportInput.
def runCmsReport(self, cpu, dir, candle)
This function is a wrapper around cmsRelvalreport.
def valFilterReport(self, dir)
Filter lines in the valgrind report that match GEN,SIM.
def __init__(self, args)
Definition: cmsPerfSuite.py:22
def displayErrors(self, file)
Display G4 cerr errors and CMSExceptions in the logfile.
def set_end(self, end=None)
Definition: cmsPerfSuite.py:43
def printFlush(self, command)
Print and flush a string (for output to a log file)
_verbose
Check logfile option.
Definition: cmsPerfSuite.py:73
def get_NumOfCores()
Definition: cmsCpuInfo.py:6
def _cleanup()
Definition: cmsPerfSuite.py:16
def testCmsDriver(self, cpu, dir, cmsver, candle)
Test cmsDriver.py (parses the simcandles file, removing duplicate lines, and runs the cmsDriver part)...
def runcmd(self, command)
Run a command and return the exit status.
PerfTestTotalTimer
FIXME: We may want to introduce a switch here or agree on a different default (currently 10 cmsScimar...
def runPerfSuite(self, create=False, castordir="/castor/cern.ch/cms/store/relval/performance/", TimeSizeEvents=100, IgProfEvents=5, CallgrindEvents=1, MemcheckEvents=5, cmsScimark=10, cmsScimarkLarge=10, cmsdriverOptions="", cmsdriverPUOptions="", stepOptions="", quicktest=False, profilers="", cpus=[1], cpu_list=[1], cores=4, prevrel="", bypasshlt=False, runonspare=True, perfsuitedir=os.getcwd(), logfile=None, TimeSizeCandles="", IgProfCandles="", CallgrindCandles="", MemcheckCandles="", TimeSizePUCandles="", IgProfPUCandles="", CallgrindPUCandles="", MemcheckPUCandles="", PUInputFile="", userInputFile="", MailLogRecipients="", tarball="")
Runs benchmarking, cpu spinlocks on spare cores and profiles selected candles.
def simpleGenReport(self, cpus, perfdir=os.getcwd(), NumEvents=1, candles=['MinBias'], cmsdriverOptions='', stepOptions='', Name='', profilers='', bypasshlt='', userInputRootFiles='')
Prepares the profiling directory and runs all the selected profiles (if this is not a unit test) ...
def __init__(self, cpu, perfsuiteinstance, simpleGenReportArgs)
def optionParse(self, argslist=None)
def set_start(self, start=None)
Definition: cmsPerfSuite.py:41
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __init__(self, cpu, perfsuiteinstance, simpleGenReportArgs)
return(e1-e2)*(e1-e2)+dp *dp
def __init__(self, start=None)
Definition: cmsPerfSuite.py:34
#define update(a, b)
Definition: main.py:1
_debug
Developer options.
Definition: cmsPerfSuite.py:70
def main(argv=[__name__])
double split
Definition: MVATrainer.cc:139
def mkCandleDir(self, pfdir, candle, profiler)
Make directory for a particular candle and profiler.
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cprootfile(self, dir, candle, NumOfEvents, cmsdriverOptions="")
Copy root file from another candle&#39;s directory ! Again this is messy.