CMS 3D CMS Logo

cmsPerfSuite.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from builtins import range
3 import os, time, sys, re, glob, exceptions
4 import optparse as opt
5 import cmsRelRegress as crr
6 from cmsPerfCommons import Candles, KeywordToCfi, CandFname, cmsDriverPileUpOption, getVerFromLog
7 import cmsRelValCmd,cmsCpuInfo
8 import threading #Needed in threading use for Valgrind
9 import subprocess #Nicer subprocess management than os.popen
10 import datetime #Used to time the running of the performance suite
11 import pickle #Used to dump the running timing information
12 from functools import reduce
13 
14 #Redefine _cleanup() function not to poll active processes
15 #[This is necessary to avoid issues when threading]
16 #So let's have it do nothing:
17 def _cleanup():
18  pass
19 #Override the function in subprocess
20 subprocess._cleanup=_cleanup
21 
22 class PerfThread(threading.Thread):
23  def __init__(self,**args):
24  self.args=args
25  threading.Thread.__init__(self)
26  def run(self):
28  #print "Arguments inside the thread instance:"
29  #print type(self.args)
30  #print self.args
31  self.suite.runPerfSuite(**(self.args))#self.args)
32 
34  """A class defining timing objects to time the running of the various parts of the performance suite. The class depends on module datetime."""
35  def __init__(self,start=None):
36  """Initialize the start time and set the end time to some indefinite time in the future"""
37  self.start = start
38  self.end = datetime.datetime.max
39  self.duration = self.start - self.end
40 
41  #Setters:
42  def set_start(self,start=None):
43  self.start = start
44  def set_end(self,end=None):
45  #print "Setting end time to %s"%end.ctime()
46  self.end = end
47  self.duration = self.end - self.start
48  #Getters
49  def get_start(self):
50  """Return the start time in ctime timestamp format"""
51  return self.start.ctime()
52  def get_end(self):
53  """Return the end time in ctime timestamp format"""
54  return self.end.ctime()
55  def get_duration(self):
56  """Return the duration between start and end as a dictionary with keys 'hours', 'minutes', 'seconds' to express the total duration in the favourite (most appropriate) unit. The function returns truncated integers."""
57  self.duration_seconds = self.duration.days*86400 + self.duration.seconds
60  return {'hours':self.duration_hours, 'minutes':self.duration_minutes, 'seconds':self.duration_seconds}
61 
62 class PerfSuite:
63  def __init__(self):
64 
65  self.ERRORS = 0
66  #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and root://eoscms//eos/ instead of /castor/cern.ch/
67  #NOT YET!
68  #FIX ME... do the migration to EOS eventually, taking care of PerFDB implications for tarball location!
69  self._CASTOR_DIR = "/castor/cern.ch/cms/store/relval/performance/"
70  self._dryrun = False
71  self._debug = False
72  self._unittest = False
73  self._noexec = False
74  self._verbose = True
75  self.logh = sys.stdout
76 
77  #Get some environment variables to use
78  try:
79  self.cmssw_arch = os.environ["SCRAM_ARCH"]
80  self.cmssw_version= os.environ["CMSSW_VERSION"]
81  self.host = os.environ["HOST"]
82  self.user = os.environ["USER"]
83  except KeyError:
84  self.logh.write('Error: An environment variable either SCRAM_ARCH, CMSSW_VERSION, HOST or USER is not available.\n')
85  self.logh.write(' Please run eval `scramv1 runtime -csh` to set your environment variables\n')
86  self.logh.flush()
87  sys.exit()
88 
89  #Scripts used by the suite:
90  self.Scripts =["cmsDriver.py","cmsRelvalreport.py","cmsRelvalreportInput.py","cmsScimark2"]
91  self.AuxiliaryScripts=["cmsScimarkLaunch.csh","cmsScimarkParser.py","cmsScimarkStop.py"]
92 
93 
94  #Threading the execution of IgProf, Memcheck and Callgrind using the same model used to thread the whole performance suite:
95  #1-Define a class simpleGenReportThread() that has relevant methods needed to handle PerfTest()
96  #2-Instantiate one with the necessary arguments to run simpleGenReport on core N
97  #3-Execute its "run" method by starting the thread
98  #Simplest way maybe is to keep 2 global lists:
99  #AvailableCores
100  #TestsToDo
101  #PerfSuite will fill the TestsToDo list with dictionaries, to be used as keyword arguments to instantiate a relevant thread.
102  #Once all the TestsToDo are "scheduled" into the list (FirstInLastOut buffer since we use pop()) PerfSuite will look into the
103  #AvailableCores list and start popping cores onto which to instantiate the relevant threads, then it will start the thread,
104  #appending it to the activePerfTestThread{},a dictionary with core as key and thread object as value, to facilitate bookkeeping.
105  #An infinite loop will take care of checking for AvailableCores as long as there are TestsToDo and keep submitting.
106  #In the same loop the activePerfTestThread{} will be checked for finished threads and it will re-append the relevant cpu back
107  #to the AvailableCores list.
108  #In the same loop a check for the case of all cores being back into AvailableCores with no more TestsToDo will break the infinite loop
109  #and declare the end of all tests.As else to this if a sleep statement of 5 seconds will delay the repetition of the loop.
110 
111  def createIgVolume(self):
112  igcommand = '/afs/cern.ch/cms/sdt/internal/scripts/requestPerfIgprofSpace.py --version ' + self.cmssw_version + ' --platform ' + self.cmssw_arch
113  subprocess.Popen(igcommand,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
114 
115 
116  class simpleGenReportThread(threading.Thread):
117  def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs): #Passing around the perfsuite object to be able to access simpleGenReport
118  self.cpu=cpu
119  self.simpleGenReportArgs=simpleGenReportArgs
120  self.perfsuiteinstance=perfsuiteinstance
121  threading.Thread.__init__(self)
122  def run(self):
123  self.PerfTest=self.perfsuiteinstance.PerfTest(self.cpu,self.perfsuiteinstance,**(self.simpleGenReportArgs))
124  self.PerfTest.runPerfTest()
125 
126  class PerfTest:
127  def __init__(self,cpu,perfsuiteinstance,**simpleGenReportArgs):
128  self.cpu=cpu
129  self.simpleGenReportArgs=simpleGenReportArgs
130  self.perfsuiteinstance=perfsuiteinstance
131  def runPerfTest(self):
132 # self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
133 # TimerInfo.update({self.simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary
134  if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
135  self.perfsuiteinstance.logh.write("Launching the PILE UP %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
136  self.PerfTestPUTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
137  TimerInfo[self.simpleGenReportArgs['Name']].update({'PileUpTime':self.PerfTestPUTimer}) #Add the TimeSize timer to the dictionary
138 
139  else:
140  self.perfsuiteinstance.logh.write("Launching the %s tests on cpu %s with %s events each\n"%(self.simpleGenReportArgs['Name'],self.cpu,self.simpleGenReportArgs['NumEvents']))
141  self.PerfTestTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
142  TimerInfo[self.simpleGenReportArgs['Name']].update({'NoPileUpTime':self.PerfTestTimer}) #Add the TimeSize timer to the dictionary
143  self.perfsuiteinstance.logh.flush()
144  #Cut and paste in bulk, should see if this works...
145  self.perfsuiteinstance.printDate()
146  self.perfsuiteinstance.logh.flush()
147  self.exitcode=self.perfsuiteinstance.simpleGenReport([self.cpu],**(self.simpleGenReportArgs)) #Returning ReportExit code
148  #Stop the timers on the threaded PileUp and NoPileUp tests:
149  if "--pileup" in self.simpleGenReportArgs['cmsdriverOptions']:
150  self.PerfTestPUTimer.set_end(datetime.datetime.now())
151  else:
152  self.PerfTestTimer.set_end(datetime.datetime.now())
153  return self.exitcode
154 
155  #Options handling
156  def optionParse(self,argslist=None):
157  parser = opt.OptionParser(usage='''./cmsPerfSuite.py [options]
158 
159  Examples:
160 
161  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunDigiPileUp TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent FEVTDEBUGHLT --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
162  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP for all TTbar tests defined, i.e. 5 TimeSize, 2 IgProf, 1 Callgrind, 5 Memcheck. The file /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root will be copied locally as INPUT_PILEUP_EVENTS.root and it will be used as the input file for the MixingModule pile up events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
163  OR
164  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root
165  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once)
166  OR
167  cmsPerfSuite.py --step GEN-HLT -t 5 -i 2 -c 1 -m 5 --RunTimeSize MinBias,TTbar --RunIgProf TTbar --RunCallgrind TTbar --RunMemcheck TTbar --RunTimeSizePU TTbar --PUInputFile /store/relval/CMSSW_2_2_1/RelValMinBias/GEN-SIM-DIGI-RAW-HLTDEBUG/IDEAL_V9_v2/0001/101C84AF-56C4-DD11-A90D-001D09F24EC0.root --cmsdriver="--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All"
168  (this will run the suite with 5 events for TimeSize tests on MinBias and TTbar, 2 for IgProf tests on TTbar only, 1 for Callgrind tests on TTbar only, 5 for Memcheck on MinBias and TTbar, it will also run DIGI PILEUP on TTbar but only for 5 TimeSize events. All these tests will be done for the step GEN-HLT, i.e. GEN,SIM,DIGI,L1,DIGI2RAW,HLT at once. It will also add the options "--eventcontent RAWSIM --conditions FrontierConditions_GlobalTag,IDEAL_V9::All" to all cmsDriver.py commands executed by the suite. In addition it will run only 2 cmsDriver.py "steps": "GEN,SIM" and "DIGI". Note the syntax GEN-SIM for combined cmsDriver.py steps)
169 
170  Legal entries for individual candles (--RunTimeSize, --RunIgProf, --RunCallgrind, --RunMemcheck options):
171  %s
172  ''' % ("\n".join(Candles)))
173 
174  parser.set_defaults(TimeSizeEvents = 0 ,
175  IgProfEvents = 0 ,
176  CallgrindEvents = 0 ,
177  MemcheckEvents = 0 ,
178  cmsScimark = 10 ,
179  cmsScimarkLarge = 10 ,
180  cmsdriverOptions = "--eventcontent FEVTDEBUGHLT", # Decided to avoid using the automatic parsing of cmsDriver_highstats_hlt.txt: cmsRelValCmd.get_cmsDriverOptions(), #Get these options automatically now!
181  #"Release Integrators" will create another file relative to the performance suite and the operators will fetch from that file the --cmsdriver option... for now just set the eventcontent since that is needed in order for things to run at all now...
182  stepOptions = "" ,
183  profilers = "" ,
184  outputdir = "" ,
185  logfile = os.path.join(os.getcwd(),"cmsPerfSuite.log"),
186  runonspare = True ,
187  bypasshlt = False ,
188  quicktest = False ,
189  unittest = False ,
190  noexec = False ,
191  dryrun = False ,
192  verbose = True ,
193  create = False ,
194  previousrel = "" ,
195  castordir = self._CASTOR_DIR,
196  cores = cmsCpuInfo.get_NumOfCores(), #Get Number of cpu cores on the machine from /proc/cpuinfo
197  cpu = "1" , #Cpu core on which the suite is run:
198  RunTimeSize = "" ,
199  RunIgProf = "" ,
200  RunCallgrind = "" ,
201  RunMemcheck = "" ,
202  RunDigiPileUP = "" ,
203  RunTimeSizePU = "" ,
204  RunIgProfPU = "" ,
205  RunCallgrindPU = "" ,
206  RunMemcheckPU = "" ,
207  PUInputFile = "" ,
208  userInputFile = "" )
209  parser.add_option('--createIgVol', action="store_true", dest='create',
210  help = 'Create IgProf AFS volume for the release and architecture')
211  parser.add_option('-q', '--quiet' , action="store_false", dest='verbose' ,
212  help = 'Output less information' )
213  parser.add_option('-b', '--bypass-hlt' , action="store_true" , dest='bypasshlt' ,
214  help = 'Bypass HLT root file as input to RAW2DIGI')
215  parser.add_option('-n', '--notrunspare', action="store_false", dest='runonspare',
216  help = 'Do not run cmsScimark on spare cores')
217  parser.add_option('-t', '--timesize' , type='int' , dest='TimeSizeEvents' , metavar='<#EVENTS>' ,
218  help = 'specify the number of events for the TimeSize tests' )
219  parser.add_option('-i', '--igprof' , type='int' , dest='IgProfEvents' , metavar='<#EVENTS>' ,
220  help = 'specify the number of events for the IgProf tests' )
221  parser.add_option('-c', '--callgrind' , type='int' , dest='CallgrindEvents' , metavar='<#EVENTS>' ,
222  help = 'specify the number of events for the Callgrind tests' )
223  parser.add_option('-m', '--memcheck' , type='int' , dest='MemcheckEvents' , metavar='<#EVENTS>' ,
224  help = 'specify the number of events for the Memcheck tests' )
225  parser.add_option('--cmsScimark' , type='int' , dest='cmsScimark' , metavar='' ,
226  help = 'specify the number of times the cmsScimark benchmark is run before and after the performance suite on cpu1')
227  parser.add_option('--cmsScimarkLarge' , type='int' , dest='cmsScimarkLarge' , metavar='' ,
228  help = 'specify the number of times the cmsScimarkLarge benchmark is run before and after the performance suite on cpu1')
229  parser.add_option('--cores' , type='int', dest='cores' , metavar='<CORES>' ,
230  help = 'specify the number of cores of the machine (can be used with 0 to stop cmsScimark from running on the other cores)')
231  parser.add_option('--cmsdriver' , type='string', dest='cmsdriverOptions', metavar='<OPTION_STR>',
232  help = 'specify special options to use with the cmsDriver.py commands (designed for integration build use')
233  parser.add_option('-a', '--archive' , type='string', dest='castordir' , metavar='<DIR>' ,
234  help = 'specify the wanted CASTOR directory where to store the results tarball')
235  parser.add_option('-L', '--logfile' , type='string', dest='logfile' , metavar='<FILE>' ,
236  help = 'file to store log output of the script')
237  parser.add_option('-o', '--output' , type='string', dest='outputdir' , metavar='<DIR>' ,
238  help = 'specify the directory where to store the output of the script')
239  parser.add_option('-r', '--prevrel' , type='string', dest='previousrel' , metavar='<DIR>' ,
240  help = 'Top level dir of previous release for regression analysis')
241  parser.add_option('--step' , type='string', dest='stepOptions' , metavar='<STEPS>' ,
242  help = 'specify the processing steps intended (instead of the default ones)' )
243  parser.add_option('--cpu' , type='string', dest='cpu' , metavar='<CPU>' ,
244  help = 'specify the core on which to run the performance suite')
245 
246  #Adding new options to put everything configurable at command line:
247  parser.add_option('--RunTimeSize' , type='string', dest='RunTimeSize' , metavar='<CANDLES>' ,
248  help = 'specify on which candles to run the TimeSize tests')
249  parser.add_option('--RunIgProf' , type='string', dest='RunIgProf' , metavar='<CANDLES>' ,
250  help = 'specify on which candles to run the IgProf tests')
251  parser.add_option('--RunCallgrind' , type='string', dest='RunCallgrind' , metavar='<CANDLES>' ,
252  help = 'specify on which candles to run the Callgrind tests')
253  parser.add_option('--RunMemcheck' , type='string', dest='RunMemcheck' , metavar='<CANDLES>' ,
254  help = 'specify on which candles to run the Memcheck tests')
255  parser.add_option('--RunDigiPileUp' , type='string', dest='RunDigiPileUp' , metavar='<CANDLES>' ,
256  help = 'specify the candle on which to run DIGI PILE UP and repeat all the tests set to run on that candle with PILE UP')
257  parser.add_option('--PUInputFile' , type='string', dest='PUInputFile' , metavar='<FILE>' ,
258  help = 'specify the root file to pick the pile-up events from')
259  parser.add_option('--RunTimeSizePU' , type='string', dest='RunTimeSizePU' , metavar='<CANDLES>' ,
260  help = 'specify on which candles to run the TimeSize tests with PILE UP')
261  parser.add_option('--RunIgProfPU' , type='string', dest='RunIgProfPU' , metavar='<CANDLES>' ,
262  help = 'specify on which candles to run the IgProf tests with PILE UP')
263  parser.add_option('--RunCallgrindPU' , type='string', dest='RunCallgrindPU' , metavar='<CANDLES>' ,
264  help = 'specify on which candles to run the Callgrind tests with PILE UP')
265  parser.add_option('--RunMemcheckPU' , type='string', dest='RunMemcheckPU' , metavar='<CANDLES>' ,
266  help = 'specify on which candles to run the Memcheck tests with PILE UP')
267 
268  #Adding a filein option to use pre-processed RAW file for RECO and HLT:
269  parser.add_option('--filein' , type='string', dest='userInputFile' , metavar='<FILE>', #default="",
270  help = 'specify input RAW root file for HLT and RAW2DIGI-RECO (list the files in the same order as the candles for the tests)')
271 
272  #Adding an option to handle additional (to the default user) email addresses to the email notification list (that sends the cmsPerfSuite.log once the performance suite is done running):
273  parser.add_option('--mail', type='string', dest='MailLogRecipients', metavar='<EMAIL ADDRESS>', default=self.user, help='specify valid email address(es) name@domain in order to receive notification at the end of the performance suite running with the cmsPerfSuite.log file')
274 
275  #Adding option to turn off tarball creation at the end of the execution of the performance suite:
276  parser.add_option('--no_tarball', action="store_false", dest='tarball', default=True, help='Turn off automatic tarball creation at the end of the performance suite execution')
277 
278  #####################
279  #
280  # Developer options
281  #
282 
283  devel = opt.OptionGroup(parser, "Developer Options",
284  "Caution: use these options at your own risk."
285  "It is believed that some of them bite.\n")
286 
287  devel.add_option('-p', '--profile' , type="str" , dest='profilers', metavar="<PROFILERS>" ,
288  help = 'Profile codes to use for cmsRelvalInput' )
289  devel.add_option('-f', '--false-run', action="store_true", dest='dryrun' ,
290  help = 'Dry run' )
291  devel.add_option('-d', '--debug' , action='store_true', dest='debug' ,
292  help = 'Debug' )
293  devel.add_option('--quicktest' , action="store_true", dest='quicktest',
294  help = 'Quick overwrite all the defaults to small numbers so that we can run a quick test of our chosing.' )
295  devel.add_option('--test' , action="store_true", dest='unittest' ,
296  help = 'Perform a simple test, overrides other options. Overrides verbosity and sets it to false.' )
297  devel.add_option('--no_exec' , action="store_true", dest='noexec' ,
298  help = 'Run the suite without executing the cmsRelvalreport.py commands in the various directories. This is a useful debugging tool.' )
299  parser.add_option_group(devel)
300  (options, args) = parser.parse_args(argslist)
301 
302 
303  self._debug = options.debug
304  self._unittest = options.unittest
305  self._noexec = options.noexec
306  self._verbose = options.verbose
307  self._dryrun = options.dryrun
308  create = options.create
309  castordir = options.castordir
310  TimeSizeEvents = options.TimeSizeEvents
311  IgProfEvents = options.IgProfEvents
312  CallgrindEvents = options.CallgrindEvents
313  MemcheckEvents = options.MemcheckEvents
314  cmsScimark = options.cmsScimark
315  cmsScimarkLarge = options.cmsScimarkLarge
316  cmsdriverOptions = options.cmsdriverOptions
317  stepOptions = options.stepOptions
318  quicktest = options.quicktest
319  #candleoption = options.candleOptions
320  runonspare = options.runonspare
321  profilers = options.profilers.strip()
322  cpu = options.cpu.strip()
323  bypasshlt = options.bypasshlt
324  cores = options.cores
325  logfile = options.logfile
326  prevrel = options.previousrel
327  outputdir = options.outputdir
328  RunTimeSize = options.RunTimeSize
329  RunIgProf = options.RunIgProf
330  RunCallgrind = options.RunCallgrind
331  RunMemcheck = options.RunMemcheck
332  RunDigiPileUp = options.RunDigiPileUp
333  RunTimeSizePU = options.RunTimeSizePU
334  RunIgProfPU = options.RunIgProfPU
335  RunCallgrindPU = options.RunCallgrindPU
336  RunMemcheckPU = options.RunMemcheckPU
337  PUInputFile = options.PUInputFile
338  userInputFile = options.userInputFile
339  if options.MailLogRecipients !="" and self.user not in options.MailLogRecipients: #To allow for the --mail "" case of suppressing the email and the default user case
340  MailLogRecipients= self.user+","+options.MailLogRecipients #Add the user by default if there is a mail report
341  else:
342  MailLogRecipients=options.MailLogRecipients
343  tarball = options.tarball
344 
345  #################
346  # Check logfile option
347  #
348  if not logfile == None:
349  logfile = os.path.abspath(logfile)
350  logdir = os.path.dirname(logfile)
351  if not os.path.exists(logdir):
352  parser.error("Directory to output logfile does not exist")
353  sys.exit()
354  logfile = os.path.abspath(logfile)
355 
356  #############
357  # Check step Options
358  #
359  if "GEN,SIM" in stepOptions:
360  self.logh.write("WARNING: Please use GEN-SIM with a hypen not a \",\"!\n")
361  #Using the step option as a switch between different dictionaries for:
362  #RunTimeSize,RunIgProf,RunCallgrind,RunMemCheck,RunDigiPileUp:
363  if stepOptions == "" or stepOptions == 'Default':
364  pass
365  else:
366  stepOptions='--usersteps=%s' % (stepOptions)
367 
368  ###############
369  # Check profile option
370  #
371  isnumreg = re.compile("^-?[0-9]*$")
372  found = isnumreg.search(profilers)
373  if not found :
374  parser.error("profile codes option contains non-numbers")
375  sys.exit()
376 
377  ###############
378  # Check output directory option
379  #
380  if outputdir == "":
381  outputdir = os.getcwd()
382  else:
383  outputdir = os.path.abspath(outputdir)
384 
385  if not os.path.isdir(outputdir):
386  parser.error("%s is not a valid output directory" % outputdir)
387  sys.exit()
388 
389  ################
390  # Check cpu option
391  #
392  numetcomreg = re.compile("^[0-9,]*")
393  if not numetcomreg.search(cpu):
394  parser.error("cpu option needs to be a comma separted list of ints or a single int")
395  sys.exit()
396 
397  cpustr = cpu
398  cpu = []
399  if "," in cpustr:
400  cpu = map(lambda x: int(x),cpustr.split(","))
401  else:
402  cpu = [ int(cpustr) ]
403 
404  ################
405  # Check previous release directory
406  #
407  if not prevrel == "":
408  prevrel = os.path.abspath(prevrel)
409  if not os.path.exists(prevrel):
410  self.logh.write("ERROR: Previous release dir %s could not be found" % prevrel)
411  sys.exit()
412 
413  #############
414  # Setup quicktest option
415  #
416  if quicktest:
417  TimeSizeEvents = 1
418  IgProfEvents = 1
419  CallgrindEvents = 0
420  MemcheckEvents = 0
421  cmsScimark = 1
422  cmsScimarkLarge = 1
423 
424  #############
425  # Setup unit test option
426  #
427  if self._unittest:
428  self._verbose = False
429  if stepOptions == "":
430  stepOptions = "GEN-SIM,DIGI,L1,DIGI2RAW,HLT,RAW2DIGI-RECO"
431  cmsScimark = 0
432  cmsScimarkLarge = 0
433  CallgrindEvents = 0
434  MemcheckEvents = 0
435  IgProfEvents = 0
436  TimeSizeEvents = 1
437 
438  #Split all the RunTimeSize etc candles in lists:
439  TimeSizeCandles=[]
440  IgProfCandles=[]
441  CallgrindCandles=[]
442  MemcheckCandles=[]
443  TimeSizePUCandles=[]
444  IgProfPUCandles=[]
445  CallgrindPUCandles=[]
446  MemcheckPUCandles=[]
447  userInputRootFiles=[]
448  if RunTimeSize:
449  TimeSizeCandles = RunTimeSize.split(",")
450  if RunIgProf:
451  IgProfCandles = RunIgProf.split(",")
452  if RunCallgrind:
453  CallgrindCandles = RunCallgrind.split(",")
454  if RunMemcheck:
455  MemcheckCandles = RunMemcheck.split(",")
456  if RunDigiPileUp:
457  for candle in RunDigiPileUp.split(","):
458  if candle in TimeSizeCandles:
459  TimeSizePUCandles.append(candle)
460  if candle in IgProfCandles:
461  IgProfPUCandles.append(candle)
462  if candle in CallgrindCandles:
463  CallgrindPUCandles.append(candle)
464  if candle in MemcheckCandles:
465  MemcheckPUCandles.append(candle)
466  if RunTimeSizePU:
467  TimeSizePUCandles.extend(RunTimeSizePU.split(","))
468  #Some smart removal of duplicates from the list!
469  temp=set(TimeSizePUCandles)
470  TimeSizePUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
471  if RunIgProfPU:
472  IgProfPUCandles.extend(RunIgProfPU.split(","))
473  #Some smart removal of duplicates from the list!
474  temp=set(IgProfPUCandles)
475  IgProfPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
476  if RunCallgrindPU:
477  CallgrindPUCandles.extend(RunCallgrindPU.split(","))
478  #Some smart removal of duplicates from the list!
479  temp=set(CallgrindPUCandles)
480  CallgrindPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
481  if RunMemcheckPU:
482  MemcheckPUCandles.extend(RunMemcheckPU.split(","))
483  #Some smart removal of duplicates from the list!
484  temp=set(MemcheckPUCandles)
485  MemcheckPUCandles=list(temp) #Doing it in 2 steps to avoid potential issues with type of arguments
486  if userInputFile:
487  userInputRootFiles=userInputFile.split(",")
488 
489 
490 
491  #############
492  # Setup cmsdriver and eventual cmsdriverPUoption
493  #
494  cmsdriverPUOptions=""
495  if cmsdriverOptions:
496  #Set the eventual Pile Up cmsdriver options first:
497  if TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles:
498  #Bug fixed: no space between --pileup= and LowLumiPileUp (otherwise could omit the =)
499  cmsdriverPUOptions = '--cmsdriver="%s %s%s"'%(cmsdriverOptions," --pileup=",cmsDriverPileUpOption)
500  #Set the regular ones too:
501  cmsdriverOptions = '--cmsdriver="%s"'%cmsdriverOptions
502 
503  return (create ,
504  castordir ,
505  TimeSizeEvents ,
506  IgProfEvents ,
507  CallgrindEvents ,
508  MemcheckEvents ,
509  cmsScimark ,
510  cmsScimarkLarge ,
511  cmsdriverOptions,
512  cmsdriverPUOptions,
513  stepOptions ,
514  quicktest ,
515  profilers ,
516  cpu ,
517  cores ,
518  prevrel ,
519  bypasshlt ,
520  runonspare ,
521  outputdir ,
522  logfile ,
523  TimeSizeCandles ,
524  IgProfCandles ,
525  CallgrindCandles,
526  MemcheckCandles ,
527  TimeSizePUCandles ,
528  IgProfPUCandles ,
529  CallgrindPUCandles,
530  MemcheckPUCandles ,
531  PUInputFile ,
532  userInputRootFiles,
533  MailLogRecipients,
534  tarball)
535 
536  #def usage(self):
537  # return __doc__
538 
539  ############
540  # Run a list of commands using system
541  # ! We should rewrite this not to use system (most cases it is unnecessary)
542  def runCmdSet(self,cmd):
543  exitstat = 0
544  if len(cmd) <= 1:
545  exitstat = self.runcmd(cmd)
546  if self._verbose:
547  self.printFlush(cmd)
548  else:
549  for subcmd in cmd:
550  if self._verbose:
551  self.printFlush(subcmd)
552  exitstat = self.runcmd(" && ".join(cmd))
553  if self._verbose:
554  self.printFlush(self.getDate())
555  return exitstat
556 
557  #############
558  # Print and flush a string (for output to a log file)
559  #
560  def printFlush(self,command):
561  if self._verbose:
562  self.logh.write(str(command) + "\n")
563  self.logh.flush()
564 
565  #############
566  # Run a command and return the exit status
567  #
568  def runcmd(self,command):
569  #Substitute popen with subprocess.Popen!
570  #Using try/except until Popen becomes thread safe (it seems that everytime it is called
571  #all processes are checked to reap the ones that are done, this creates a race condition with the wait()... that
572  #results into an error with "No child process".
573  #os.popen(command)
574  try:
575  process = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
576  pid=process.pid
577  exitstat= process.wait()
578  cmdout = process.stdout.read()
579  exitstat = process.returncode
580  except OSError as detail:
581  self.logh.write("Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s\n"%(command,pid,detail))
582  self.logh.flush()
583  exitstat=999
584  cmdout="Race condition in subprocess.Popen has robbed us of the exit code of the %s process (PID %s).Assume it failed!\n %s"%(command,pid,detail)
585  if self._verbose:
586  self.logh.write(cmdout)# + "\n") No need of extra \n!
587  self.logh.flush()
588  if exitstat == None:
589  self.logh.write("Something strange is going on! Exit code was None for command %s: check if it really ran!"%command)
590  self.logh.flush()
591  exitstat=0
592  return exitstat
593 
594  def getDate(self):
595  return time.ctime()
596 
597  def printDate(self):
598  self.logh.write(self.getDate() + "\n")
599  self.logh.flush()
600  #############
601  # Make directory for a particular candle and profiler.
602  # ! This is really unnecessary code and should be replaced with a os.mkdir() call
603  def mkCandleDir(self,pfdir,candle,profiler):
604  adir = os.path.join(pfdir,"%s_%s" % (candle,profiler))
605  self.runcmd( "mkdir -p %s" % adir )
606  if self._verbose:
607  self.printDate()
608  return adir
609 
610  #############
611  # Copy root file from another candle's directory
612  # ! Again this is messy.
613 
614  def cprootfile(self,dir,candle,NumOfEvents,cmsdriverOptions=""):
615  cmds = ("cd %s" % dir,
616  "cp -pR ../%s_IgProf/%s_GEN,SIM.root ." % (candle,CandFname[candle]))
617 
618  if self.runCmdSet(cmds):
619  self.logh.write("Since there was no ../%s_IgProf/%s_GEN,SIM.root file it will be generated first\n"%(candle,CandFname[candle]))
620 
621  cmd = "cd %s ; cmsDriver.py %s -s GEN,SIM -n %s --fileout %s_GEN,SIM.root %s>& %s_GEN_SIM_for_valgrind.log" % (dir,KeywordToCfi[candle],str(NumOfEvents),candle,cmsdriverOptions,candle)
622 
623  self.printFlush(cmd)
624  #Obsolete popen4-> subprocess.Popen
625  #cmdout=os.popen3(cmd)[2].read()
626  cmdout=subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
627  if cmdout:
628  self.printFlush(cmdout)
629  return cmdout
630 
631  #############
632  # Display G4 cerr errors and CMSExceptions in the logfile
633  #
634  def displayErrors(self,file):
635  try:
636  for line in open(file,"r"):
637  if "cerr" in line or "CMSException" in line:
638  self.logh.write("ERROR: %s\n" % line)
639  self.ERRORS += 1
640  except OSError as detail:
641  self.logh.write("WARNING: %s\n" % detail)
642  self.ERRORS += 1
643  except IOError as detail:
644  self.logh.write("WARNING: %s\n" % detail)
645  self.ERRORS += 1
646 
647  ##############
648  # Filter lines in the valgrind report that match GEN,SIM
649  #
650  def valFilterReport(self,dir):
651  #cmds = ("cd %s" % dir,
652  # "grep -v \"step=GEN,SIM\" SimulationCandles_%s.txt > tmp" % (self.cmssw_version),
653  # "mv tmp SimulationCandles_%s.txt" % (self.cmssw_version))
654  #FIXME:
655  #Quick and dirty hack to have valgrind MemCheck run on 5 events on both GEN,SIM and DIGI in QCD_80_120, while removing the line for GEN,SIM for Callgrind
656  InputFileName=os.path.join(dir,"SimulationCandles_%s.txt"%(self.cmssw_version))
657  InputFile=open(InputFileName,"r")
658  InputLines=InputFile.readlines()
659  InputFile.close()
660  Outputfile=open(InputFileName,"w")
661  simRegxp=re.compile("step=GEN,SIM")
662  digiRegxp=re.compile("step=DIGI")
663  CallgrindRegxp=re.compile("ValgrindFCE")
664  MemcheckRegxp=re.compile("Memcheck")
665  NumEvtRegxp=re.compile("-n 1")#FIXME Either use the ValgrindEventNumber or do a more general match!
666  for line in InputLines:
667  if simRegxp.search(line) and CallgrindRegxp.search(line):
668  continue
669  elif simRegxp.search(line) and MemcheckRegxp.search(line):
670  #Modify
671  if NumEvtRegxp.search(line):
672  line=NumEvtRegxp.sub(r"-n 5",line)
673  else:
674  self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
675  Outputfile.write(line)
676  elif digiRegxp.search(line) and MemcheckRegxp.search(line):
677  #Modify
678  if NumEvtRegxp.search(line):
679  line=NumEvtRegxp.sub(r"-n 5",line)
680  else:
681  self.logh.write("The number of Memcheck event was not changed since the original number of Callgrind event was not 1!\n")
682  Outputfile.write(line)
683  else:
684  Outputfile.write(line)
685  self.logh.flush()
686  Outputfile.close()
687 
688  #self.runCmdSet(cmds)
689 
690  ##################
691  # Run cmsScimark benchmarks a number of times
692  #
693  def benchmarks(self,cpu,pfdir,name,bencher,large=False):
694  cmd = self.Commands[cpu][3]
695  redirect = ""
696  if large:
697  redirect = " -large >>"
698  else:
699  redirect = " >>"
700 
701  for i in range(bencher):
702  #Check first for the existence of the file so that we can append:
703  if not os.path.exists(os.path.join(pfdir,os.path.basename(name))):
704  #Equivalent of touch to make sure the file exist to be able to append to it.
705  open(os.path.join(pfdir,os.path.basename(name)))
706 
707  command= cmd + redirect + os.path.join(pfdir,os.path.basename(name))
708  self.printFlush(command + " [%s/%s]" % (i+1,bencher))
709  self.runcmd(command)
710  self.logh.flush()
711 
712  ##################
713  # This function is a wrapper around cmsRelvalreport
714  #
715  def runCmsReport(self,cpu,dir,candle):
716  cmd = self.Commands[cpu][1]
717  cmds = ("cd %s" % (dir),
718  "%s -i SimulationCandles_%s.txt -t perfreport_tmp -R -P >& %s.log" % (cmd,self.cmssw_version,candle))
719  exitstat = 0
720  if not self._debug:
721  exitstat = self.runCmdSet(cmds)
722 
723  if self._unittest and (not exitstat == 0):
724  self.logh.write("ERROR: CMS Report returned a non-zero exit status \n")
725  sys.exit(exitstat)
726  else:
727  return(exitstat) #To return the exit code of the cmsRelvalreport.py commands to the runPerfSuite function
728 
729  ##################
730  # Test cmsDriver.py (parses the simcandles file, removing duplicate lines, and runs the cmsDriver part)
731  #
732  def testCmsDriver(self,cpu,dir,cmsver,candle):
733  cmsdrvreg = re.compile("^cmsDriver.py")
734  cmd = self.Commands[cpu][0]
735  noExit = True
736  stepreg = re.compile("--step=([^ ]*)")
737  previousCmdOnline = ""
738  for line in open(os.path.join(dir,"SimulationCandles_%s.txt" % (cmsver))):
739  if (not line.lstrip().startswith("#")) and not (line.isspace() or len(line) == 0):
740  cmdonline = line.split("@@@",1)[0]
741  if cmsdrvreg.search(cmdonline) and not previousCmdOnline == cmdonline:
742  stepbeingrun = "Unknown"
743  matches = stepreg.search(cmdonline)
744  if not matches == None:
745  stepbeingrun = matches.groups()[0]
746  if "PILEUP" in cmdonline:
747  stepbeingrun += "_PILEUP"
748  self.logh.write(cmdonline + "\n")
749  cmds = ("cd %s" % (dir),
750  "%s >& ../cmsdriver_unit_test_%s_%s.log" % (cmdonline,candle,stepbeingrun))
751  if self._dryrun:
752  self.logh.write(cmds + "\n")
753  else:
754  out = self.runCmdSet(cmds)
755  if not out == None:
756  sig = out >> 16 # Get the top 16 bits
757  xstatus = out & 0xffff # Mask out all bits except the first 16
758  self.logh.write("FATAL ERROR: CMS Driver returned a non-zero exit status (which is %s) when running %s for candle %s. Signal interrupt was %s\n" % (xstatus,stepbeingrun,candle,sig))
759  sys.exit()
760  previousCmdOnline = cmdonline
761 
762  ##############
763  # Wrapper for cmsRelvalreportInput
764  #
765  def runCmsInput(self,cpu,dir,numevents,candle,cmsdrvopts,stepopt,profiles,bypasshlt,userInputFile):
766 
767  #Crappy fix for optional options with special synthax (bypasshlt and userInputFile)
768  bypass = ""
769  if bypasshlt:
770  bypass = "--bypass-hlt"
771  userInputFileOption=""
772  if userInputFile:
773  userInputFileOption = "--filein %s"%userInputFile
774  cmd = self.Commands[cpu][2]
775  cmds=[]
776  #print cmds
777  cmds = ("cd %s" % (dir),
778  "%s %s \"%s\" %s %s %s %s %s" % (cmd,
779  numevents,
780  candle,
781  profiles,
782  cmsdrvopts,
783  stepopt,
784  bypass,userInputFileOption))
785  exitstat=0
786  exitstat = self.runCmdSet(cmds)
787  if self._unittest and (not exitstat == 0):
788  self.logh.write("ERROR: CMS Report Input returned a non-zero exit status \n" )
789  return exitstat
790  ##############
791  # Prepares the profiling directory and runs all the selected profiles (if this is not a unit test)
792  #
793  #Making parameters named to facilitate the handling of arguments (especially with the threading use case)
794  def simpleGenReport(self,cpus,perfdir=os.getcwd(),NumEvents=1,candles=['MinBias'],cmsdriverOptions='',stepOptions='',Name='',profilers='',bypasshlt='',userInputRootFiles=''):
795  callgrind = Name == "Callgrind"
796  memcheck = Name == "Memcheck"
797 
798  profCodes = {"TimeSize" : "0123",
799  "IgProf" : "4567",
800  "IgProf_Perf":"47", #Added the Analyse to IgProf_Perf #FIXME: At the moment Analyse is always run whether 7 is selected or not! Issue to solve in cmsRelvalreportInput.py... but not really important (it's always been there, not impacting our use-cases).
801  "IgProf_Mem":"567",
802  "Callgrind": "8",
803  "Memcheck" : "9",
804  None : "-1"}
805 
806  profiles = profCodes[Name]
807  if not profilers == "":
808  profiles = profilers
809 
810  RelvalreportExitCode=0
811 
812  for cpu in cpus:
813  pfdir = perfdir
814  if len(cpus) > 1:
815  pfdir = os.path.join(perfdir,"cpu_%s" % cpu)
816  for candle in candles:
817  #Create the directory for cmsRelvalreport.py running (e.g. MinBias_TimeSize, etc)
818  #Catch the case of PILE UP:
819  if "--pileup" in cmsdriverOptions:
820  candlename=candle+"_PU"
821  else:
822  candlename=candle
823  adir=self.mkCandleDir(pfdir,candlename,Name)
824  if self._unittest:
825  # Run cmsDriver.py
826  if userInputRootFiles:
827  self.logh.write(userInputRootFiles)
828  userInputFile=userInputRootFiles[0]
829  else:
830  userInputFile=""
831  self.logh.flush()
832  self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile)
833  self.testCmsDriver(cpu,adir,candle)
834  else:
835  if userInputRootFiles:
836  self.logh.write("Variable userInputRootFiles is %s\n"%userInputRootFiles)
837  #Need to use regexp, cannot rely on the order... since for different tests there are different candles...
838  #userInputFile=userInputRootFiles[candles.index(candle)]
839  #FIXME:
840  #Note the issue that the input files HAVE to have in their name the candle as is used in cmsPerfSuite.py command line!
841  #This is currently caught by a printout in the log: should be either taken care of with some exception to throw?
842  #Will put this in the documentation
843  userInputFile=""
844  candleregexp=re.compile(candle)
845  for file in userInputRootFiles:
846  if candleregexp.search(file):
847  userInputFile=file
848  self.logh.write("For these %s %s tests will use user input file %s\n"%(candlename,Name,userInputFile))
849  if userInputFile == "":
850  self.logh.write("***WARNING: For these %s %s tests could not find a matching input file in %s: will try to do without it!!!!!\n"%(candlename,Name,userInputRootFiles))
851  self.logh.flush()
852  else:
853  userInputFile=""
854  DummyTestName=candlename+"_"+stepOptions.split("=")[1]
855  DummyTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the timer (DummyTimer is just a reference, but we will use the dictionary to access this later...
856  TimerInfo[Name].update({DummyTestName:DummyTimer}) #Add the TimeSize timer to the dictionary
857  #The following command will create the appropriate SimulationCandlesX.txt file in the relevant directory, ready to run cmsRelvalreport.py on it.
858  self.runCmsInput(cpu,adir,NumEvents,candle,cmsdriverOptions,stepOptions,profiles,bypasshlt,userInputFile)
859  #Here where the no_exec option kicks in (do everything but do not launch cmsRelvalreport.py, it also prevents cmsScimark spawning...):
860  if self._noexec:
861  self.logh.write("Running in debugging mode, without executing cmsRelvalreport.py\n")
862  self.logh.flush()
863  pass
864  else:
865  #The following command will launch cmsRelvalreport.py on the SimulationCandlesX.txt input file created above.
866  ExitCode=self.runCmsReport(cpu,adir,candle)
867  self.logh.write("Individual cmsRelvalreport.py ExitCode %s\n"%ExitCode)
868  RelvalreportExitCode=RelvalreportExitCode+ExitCode
869  self.logh.write("Summed cmsRelvalreport.py ExitCode %s\n"%RelvalreportExitCode)
870  self.logh.flush()
871  DummyTimer.set_end(datetime.datetime.now())
872 
873  #for proflog in proflogs:
874  #With the change from 2>1&|tee to >& to preserve exit codes, we need now to check all logs...
875  #less nice... we might want to do this externally so that in post-processing its a re-usable tool
876  globpath = os.path.join(adir,"*.log") #"%s.log"%candle)
877  self.logh.write("Looking for logs that match %s\n" % globpath)
878  logs = glob.glob(globpath)
879  for log in logs:
880  self.logh.write("Found log %s\n" % log)
881  self.displayErrors(log)
882  self.printFlush("Returned cumulative RelvalreportExitCode is %s"%RelvalreportExitCode)
883  return RelvalreportExitCode
884 
885  ############
886  # Runs benchmarking, cpu spinlocks on spare cores and profiles selected candles
887  #
888  #FIXME:
889  #Could redesign interface of functions to use keyword arguments:
890  #def runPerfSuite(**opts):
891  #then instead of using castordir variable, would use opts['castordir'] etc
892  def runPerfSuite(self,
893  create = False,
894  #Swtiching from CASTOR to EOS (using xrdcp instead of rfcp and root://eoscms//eos/ instead of /castor/cern.ch/
895  #Actually not yet... for consistency we will keep it on CASTOR for now
896  #FIXME! Do the migration, following its implication in PerfDB application!
897  castordir = "/castor/cern.ch/cms/store/relval/performance/",
898  TimeSizeEvents = 100 ,
899  IgProfEvents = 5 ,
900  CallgrindEvents = 1 ,
901  MemcheckEvents = 5 ,
902  cmsScimark = 10 ,
903  cmsScimarkLarge = 10 ,
904  cmsdriverOptions = "" ,#Could use directly cmsRelValCmd.get_Options()
905  cmsdriverPUOptions= "" ,
906  stepOptions = "" ,
907  quicktest = False ,
908  profilers = "" ,
909  cpus = [1] ,
910  cpu_list = [1] ,
911  cores = 4 ,#Could use directly cmsCpuInfo.get_NumOfCores()
912  prevrel = "" ,
913  bypasshlt = False ,
914  runonspare = True ,
915  perfsuitedir = os.getcwd(),
916  logfile = None,
917  TimeSizeCandles = "" ,
918  IgProfCandles = "" ,
919  CallgrindCandles = "" ,
920  MemcheckCandles = "" ,
921  TimeSizePUCandles = "" ,
922  IgProfPUCandles = "" ,
923  CallgrindPUCandles = "" ,
924  MemcheckPUCandles = "" ,
925  PUInputFile = "" ,
926  userInputFile = "" ,
927  MailLogRecipients = "" ,
928  tarball = "" ):
929 
930  #Set up a variable for the FinalExitCode to be used as the sum of exit codes:
931  FinalExitCode=0
932 
933  #Set up the logfile first!
934  if not logfile == None:
935  try:
936  self.logh = open(logfile,"a")
937  except (OSError, IOError) as detail:
938  self.logh.write(detail + "\n")
939  self.logh.flush()
940 
941  #Adding HEPSPEC06 score if available in /build/HEPSPEC06.score file
942  self.HEPSPEC06 = 0 #Set it to 0 by default (so it is easy to catch in the DB too)
943  try:
944  HEPSPEC06_file=open("/build/HEPSPEC06.score","r")
945  for line in HEPSPEC06_file.readlines():
946  if not line.startswith("#") and "HEPSPEC06" in line:
947  self.HEPSPEC06= line.split()[2]
948  except IOError:
949  self.logh.write("***Warning***: Could not find file /build/HEPSPEC06.score file on this machine!\n")
950  self.logh.flush()
951 
952  #Adding a copy of /proc/cpuinfo and /proc/meminfo in the working directory so it can be kept in the tarball on CASTOR:
953  localcpuinfo=os.path.join(perfsuitedir,"cpuinfo")
954  cpuinfo_exitcode=-1
955  if os.path.exists(localcpuinfo):
956  cpuinfo_exitcode=0
957  else:
958  self.logh.write("Copying /proc/cpuinfo in current working directory (%s)\n"%perfsuitedir)
959  cpuinfo_exitcode=self.runcmd("cp /proc/cpuinfo %s"%perfsuitedir)
960  localmeminfo=os.path.join(perfsuitedir,"meminfo")
961  meminfo_exitcode=-1
962  if os.path.exists(localmeminfo):
963  meminfo_exitcode=0
964  else:
965  self.logh.write("Copying /proc/meminfo in current working directory (%s)\n"%perfsuitedir)
966  meminfo_exitcode=self.runcmd("cp /proc/meminfo %s"%perfsuitedir)
967  if cpuinfo_exitcode or meminfo_exitcode:
968  self.logh.write("There was an issue copying the cpuinfo or meminfo files!\n")
969  self.logh.flush()
970 
971  try:
972  if not prevrel == "":
973  self.logh.write("Production of regression information has been requested with release directory %s\n" % prevrel)
974  if not cmsdriverOptions == "":
975  self.logh.write("Running cmsDriver.py with user defined options: %s\n" % cmsdriverOptions)
976  #Attach the full option synthax for cmsRelvalreportInput.py:
977  cmsdriverOptionsRelvalInput="--cmsdriver="+cmsdriverOptions
978  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
979  if not stepOptions == "":
980  self.logh.write("Running user defined steps only: %s\n" % stepOptions)
981  #Attach the full option synthax for cmsRelvalreportInput.py:
982  setpOptionsRelvalInput="--usersteps="+stepOptions
983  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
984  if bypasshlt:
985  #Attach the full option synthax for cmsRelvalreportInput.py:
986  bypasshltRelvalInput="--bypass-hlt"
987  #FIXME: should import cmsRelvalreportInput.py and avoid these issues...
988  self.logh.write("Current Architecture is %s\n"%self.cmssw_arch)
989  self.logh.write("Current CMSSW version is %s\n"%self.cmssw_version)
990  self.logh.write("This machine ( %s ) is assumed to have %s cores, and the suite will be run on cpu %s\n" %(self.host,cores,cpus))
991  self.logh.write("This machine's HEPSPEC06 score is: %s \n"%self.HEPSPEC06)
992  path=os.path.abspath(".")
993  self.logh.write("Performance Suite started running at %s on %s in directory %s, run by user %s\n" % (self.getDate(),self.host,path,self.user))
994  #Start the timer for the total performance suite running time:
995  TotalTime=PerfSuiteTimer(start=datetime.datetime.now())
996  #Also initialize the dictionary that will contain all the timing information:
997  global TimerInfo
998  TimerInfo={'TotalTime':{'TotalTime':TotalTime}} #Structure will be {'key':[PerfSuiteTimerInstance,...],...}
999  #Obsolete popen4-> subprocess.Popen
1000  #showtags=os.popen4("showtags -r")[1].read()
1001  showtags=subprocess.Popen("showtags -r",shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
1002  self.logh.write(showtags) # + "\n") No need for extra \n!
1003  self.logh.flush()
1004  #For the log:
1005  if self._verbose:
1006  self.logh.write("The performance suite results tarball will be stored in CASTOR at %s\n" % self._CASTOR_DIR)
1007  self.logh.write("%s TimeSize events\n" % TimeSizeEvents)
1008  self.logh.write("%s IgProf events\n" % IgProfEvents)
1009  self.logh.write("%s Callgrind events\n" % CallgrindEvents)
1010  self.logh.write("%s Memcheck events\n" % MemcheckEvents)
1011  self.logh.write("%s cmsScimark benchmarks before starting the tests\n" % cmsScimark)
1012  self.logh.write("%s cmsScimarkLarge benchmarks before starting the tests\n" % cmsScimarkLarge)
1013  self.logh.flush()
1014  #Actual script actions!
1015  #Will have to fix the issue with the matplotlib pie-charts:
1016  #Used to source /afs/cern.ch/user/d/dpiparo/w0/perfreport2.1installation/share/perfreport/init_matplotlib.sh
1017  #Need an alternative in the release
1018 
1019  #Code for the architecture benchmarking use-case
1020  if len(cpus) > 1:
1021  for cpu in cpus:
1022  cpupath = os.path.join(perfsuitedir,"cpu_%s" % cpu)
1023  if not os.path.exists(cpupath):
1024  os.mkdir(cpupath)
1025 
1026  self.Commands = {}
1027  AllScripts = self.Scripts + self.AuxiliaryScripts
1028 
1029  for cpu in range(cmsCpuInfo.get_NumOfCores()): #FIXME use the actual number of cores of the machine here!
1030  self.Commands[cpu] = []
1031 
1032  #Information for the log:
1033  self.logh.write("Full path of all the scripts used in this run of the Performance Suite:\n")
1034  for script in AllScripts:
1035  which="which " + script
1036 
1037  #Logging the actual version of cmsDriver.py, cmsRelvalreport.py, cmsSimPyRelVal.pl
1038  #Obsolete popen4-> subprocess.Popen
1039  #whichstdout=os.popen4(which)[1].read()
1040  whichstdout=subprocess.Popen(which,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read()
1041  self.logh.write(whichstdout) # + "\n") No need of the extra \n!
1042  if script in self.Scripts:
1043  for cpu in range(cmsCpuInfo.get_NumOfCores()):#FIXME use the actual number of cores of the machine here!
1044  command="taskset -c %s %s" % (cpu,script)
1045  self.Commands[cpu].append(command)
1046 
1047  #First submit the cmsScimark benchmarks on the unused cores:
1048  scimark = ""
1049  scimarklarge = ""
1050  if not (self._unittest or self._noexec):
1051  if (len(cpu_list) != cores):
1052  for core in range(cores):
1053  if (not core in cpus) and runonspare:
1054  self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
1055  subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (perfsuitedir, str(core))
1056  command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
1057  self.logh.write(command + "\n")
1058 
1059  #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
1060  #cpus so it makes no sense to try reading its stdout/err
1061  #Obsolete popen4-> subprocess.Popen
1062  #os.popen4(command)
1063  subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
1064 
1065  self.logh.flush()
1066 
1067  #Don't do benchmarking if in debug mode... saves time
1068  benching = not self._debug
1069  ##FIXME:
1070  #We may want to introduce a switch here or agree on a different default (currently 10 cmsScimark and 10 cmsScimarkLarge)
1071  if benching and not (self._unittest or self._noexec):
1072  #Submit the cmsScimark benchmarks on the cpu where the suite will be run:
1073  for cpu in cpus:
1074  scimark = open(os.path.join(perfsuitedir,"cmsScimark2.log") ,"w")
1075  scimarklarge = open(os.path.join(perfsuitedir,"cmsScimark2_large.log"),"w")
1076  if cmsScimark > 0:
1077  self.logh.write("Starting with %s cmsScimark on cpu%s\n" % (cmsScimark,cpu))
1078  cmsScimarkInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
1079  TimerInfo.update({'cmsScimarkTime':{'cmsScimarkInitial':cmsScimarkInitialTime}}) #Add the cmsScimarkInitialTime information to the general TimerInfo dictionary
1080  self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
1081  cmsScimarkInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimark initial timer
1082 
1083  if cmsScimarkLarge > 0:
1084  self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
1085  cmsScimarkLargeInitialTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLarge PerfSuiteTimer
1086  TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeInitial':cmsScimarkLargeInitialTime}) #Add the cmsScimarkLargeInitialTime information to the general TimerInfo dictionary
1087  self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge, large=True)
1088  cmsScimarkLargeInitialTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1089  self.logh.flush()
1090  #Handling the Pile up input file here:
1091  if (TimeSizePUCandles or IgProfPUCandles or CallgrindPUCandles or MemcheckPUCandles) and not ("FASTSIM" in stepOptions):
1092  #Note the FASTSIM exclusion... since there is no need to copy the file for FASTSIM.
1093  PUInputName=os.path.join(perfsuitedir,"INPUT_PILEUP_EVENTS.root")
1094  if PUInputFile:
1095  #Define the actual command to copy the file locally:
1096  #Allow the file to be mounted locally (or accessible via AFS)
1097  copycmd="cp"
1098  #Allow the file to be on CASTOR (taking a full CASTOR path)
1099  if '/store/relval/' in PUInputFile:
1100  #Switching from CASTOR to EOS, i.e. from rfcp to xrdcp
1101  copycmd="xrdcp"
1102  #Accept plain LFNs from DBS for RelVal CASTOR files:
1103  #Minor fix to allow the case of user using the full path /castor/cern.ch/cms...
1104  if PUInputFile.startswith('/store/relval/'):
1105  #Switching to EOS from CASTOR:
1106  #PUInputFile="/castor/cern.ch/cms"+PUInputFile
1107  PUInputFile="root://eoscms//eos/cms"+PUInputFile
1108  #Copy the file locally
1109  self.logh.write("Copying the file %s locally to %s\n"%(PUInputFile,PUInputName))
1110  self.logh.flush()
1111  GetPUInput=subprocess.Popen("%s %s %s"%(copycmd,PUInputFile,PUInputName), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1112  GetPUInputExitCode=GetPUInput.wait()
1113  #Allow even the potential copy of a local file (even one already named INPUT_PILEUP_EVENTS.root!)
1114  if GetPUInputExitCode:
1115  self.logh.write("The copying of the pile-up input file returned a non-zero exit code: %s \nThis is the stdout+stderr if the command:\n%s\n"%(GetPUInputExitCode,GetPUInput.stdout))
1116  #Ultimately accept the case of the file being already there and not being specified in the --PUInputFile option
1117  if not os.path.exists(PUInputName):
1118  self.logh.write("The necessary INPUT_PILEUP_EVENTS.root file was not found in the working directory %s\nExiting now!"%perfsuitedir)
1119  self.logh.flush()
1120  sys.exit(1)
1121  else:
1122  #Set up here the DIGI PILE UP options
1123  self.printFlush("Some PILE UP tests will be run!")
1124  #Actually setting them earlier... when handling options... May not need this else after all... or just as a log entry.
1125  self.printFlush("cmsdriverPUOptions is %s"%cmsdriverPUOptions)
1126  pass
1127 
1128  #TimeSize tests:
1129  if TimeSizeEvents > 0:
1130  TimeSizeTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1131  TimerInfo.update({'TimeSize':{'TotalTime':TimeSizeTime}}) #Add the TimeSize timer to the dictionary
1132  if TimeSizeCandles:
1133  self.logh.write("Launching the TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
1134  NoPileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1135  TimerInfo['TimeSize'].update({'NoPileUpTime':NoPileUpTime}) #Add the TimeSize No Pile Up tests timer to the list
1136  self.printDate()
1137  self.logh.flush()
1138  ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizeCandles,cmsdriverOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
1139  FinalExitCode=FinalExitCode+ReportExit
1140  #Adding a time stamp here to parse for performance suite running time data
1141  self.printFlush("Regular TimeSize tests were finished at %s"%(self.getDate()))
1142  NoPileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1143 
1144  #Launch eventual Digi Pile Up TimeSize too:
1145  if TimeSizePUCandles:
1146  self.logh.write("Launching the PILE UP TimeSize tests (TimingReport, TimeReport, SimpleMemoryCheck, EdmSize) with %s events each\n" % TimeSizeEvents)
1147  PileUpTime=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1148  TimerInfo['TimeSize'].update({'PileUpTime':PileUpTime}) #Add the TimeSize Pile Up tests timer to the list
1149  self.printDate()
1150  self.logh.flush()
1151  ReportExit=self.simpleGenReport(cpus,perfsuitedir,TimeSizeEvents,TimeSizePUCandles,cmsdriverPUOptions,stepOptions,"TimeSize",profilers,bypasshlt,userInputFile)
1152  FinalExitCode=FinalExitCode+ReportExit
1153  #Adding a time stamp here to parse for performance suite running time data
1154  self.printFlush("Pileup TimeSize tests were finished at %s"%(self.getDate()))
1155  PileUpTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1156 
1157  #Check for issue with
1158  if not (TimeSizeCandles or TimeSizePUCandles):
1159  self.printFlush("A number of events (%s) for TimeSize tests was selected, but no candle for regular or pileup tests was selected!"%(TimeSizeEvents))
1160  #Adding a time stamp here to parse for performance suite running time data
1161  self.printFlush("All TimeSize tests were finished at %s"%(self.getDate()))
1162  TimeSizeTime.set_end(datetime.datetime.now()) #Stop TimeSize timer
1163 
1164  #Stopping all cmsScimark jobs and analysing automatically the logfiles
1165  #No need to waste CPU while the load does not affect Valgrind measurements!
1166  if not (self._unittest or self._noexec):
1167  self.logh.write("Stopping all cmsScimark jobs now\n")
1168  subcmd = "cd %s ; %s" % (perfsuitedir,self.AuxiliaryScripts[2])
1169  stopcmd = "sh -c \"%s\"" % subcmd
1170  self.printFlush(stopcmd)
1171  #os.popen(stopcmd)
1172  #Obsolete popen4-> subprocess.Popen
1173  #self.printFlush(os.popen4(stopcmd)[1].read())
1174  self.printFlush(subprocess.Popen(stopcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read())
1175 
1176  #From here on we can use all available cores to speed up the performance suite remaining tests:
1177  if cores==0: #When specifying the cpu to run the suite on, one has to set cores to 0 to avoid threading of PerfSuite itself...
1178  #So we need to catch this case for the IB tests case where we assign the test to a specific cpu.
1179  AvailableCores=cpus
1180  elif len(cpu_list) == cores: # For the new relval case, when running all the tests on one machine,
1181  # specifying the same number of cores and cpus (like: --cores 3, --cpu 3,4,5)
1182  AvailableCores=cpus
1183  else:
1184  AvailableCores=list(range(cores))
1185 
1186  #Initialize a list that will contain all the simpleGenReport keyword arguments (1 dictionary per test):
1187  TestsToDo=[]
1188  #IgProf tests:
1189  if IgProfEvents > 0:
1190  if IgProfCandles:
1191  self.printFlush("Preparing IgProf tests")
1192  #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
1193  #So in general we want to be able to split the perf and mem tests...
1194  #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
1195  if profilers:
1196  self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
1197  #Prepare the simpleGenReport arguments for this test:
1198  IgProfProfilerArgs={
1199  'perfdir':perfsuitedir,
1200  'NumEvents':IgProfEvents,
1201  'candles':IgProfCandles,
1202  'cmsdriverOptions':cmsdriverOptions,
1203  'stepOptions':stepOptions,
1204  'Name':"IgProf",
1205  'profilers':profilers,
1206  'bypasshlt':bypasshlt,
1207  'userInputRootFiles':userInputFile
1208  }
1209  #Append the test to the TestsToDo list:
1210  TestsToDo.append(IgProfProfilerArgs)
1211  self.printFlush("Appended IgProf test with profiler option %s to the TestsToDo list"%profilers)
1212  #For the default case (4,5,6,7) we split the tests into 2 jobs since they naturally are 2 cmsRun jobs and for machines with many cores this will
1213  #make the performance suite run faster.
1214  else:
1215  self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
1216  ##PERF##
1217  #Prepare the simpleGenReport arguments for this test:
1218  IgProfPerfArgs={
1219  'perfdir':perfsuitedir,
1220  'NumEvents':IgProfEvents,
1221  'candles':IgProfCandles,
1222  'cmsdriverOptions':cmsdriverOptions,
1223  'stepOptions':stepOptions,
1224  'Name':"IgProf_Perf",
1225  'profilers':profilers,
1226  'bypasshlt':bypasshlt,
1227  'userInputRootFiles':userInputFile
1228  }
1229  #Append the test to the TestsToDo list:
1230  TestsToDo.append(IgProfPerfArgs)
1231  self.printFlush("Appended IgProf PERF test to the TestsToDo list")
1232  ##MEM##
1233  #Prepare the simpleGenReport arguments for this test:
1234  IgProfMemArgs={
1235  'perfdir':perfsuitedir,
1236  'NumEvents':IgProfEvents,
1237  'candles':IgProfCandles,
1238  'cmsdriverOptions':cmsdriverOptions,
1239  'stepOptions':stepOptions,
1240  'Name':"IgProf_Mem",
1241  'profilers':profilers,
1242  'bypasshlt':bypasshlt,
1243  'userInputRootFiles':userInputFile
1244  }
1245  #Append the test to the TestsToDo list:
1246  TestsToDo.append(IgProfMemArgs)
1247  self.printFlush("Appended IgProf MEM test to the TestsToDo list")
1248  #The following will be handled in the while loop that handles the starting of the threads:
1249  #ReportExit=self.simpleGenReport(cpus,perfsuitedir,IgProfEvents,IgProfCandles,cmsdriverOptions,stepOptions,"IgProf",profilers,bypasshlt,userInputFile)
1250  #FinalExitCode=FinalExitCode+ReportExit
1251  #Launch eventual Digi Pile Up IgProf too:
1252  if IgProfPUCandles:
1253  self.printFlush("Preparing IgProf PileUp tests")
1254  #Special case for IgProf: user could pick with the option --profilers to run only IgProf perf or Mem (or Mem_Total alone etc)
1255  #So in general we want to be able to split the perf and mem tests...
1256  #For the case of --profiler option we will run only 1 test (i.e. it will get one core slot until it is done with whatever profiling choosen)
1257  if profilers:
1258  self.printFlush("Special profiler option for IgProf was indicated by the user: %s"%profilers)
1259  #Prepare the simpleGenReport arguments for this test:
1260  IgProfProfilerPUArgs={
1261  'perfdir':perfsuitedir,
1262  'NumEvents':IgProfEvents,
1263  'candles':IgProfPUCandles,
1264  'cmsdriverOptions':cmsdriverPUOptions,
1265  'stepOptions':stepOptions,
1266  'Name':"IgProf",
1267  'profilers':profilers,
1268  'bypasshlt':bypasshlt,
1269  'userInputRootFiles':userInputFile
1270  }
1271  #Append the test to the TestsToDo list:
1272  TestsToDo.append(IgProfProfilerPUArgs)
1273  self.printFlush("Appended IgProf PileUp test with profiler option %s to the TestsToDo list"%profilers)
1274  else:
1275  self.printFlush("Splitting the IgProf tests into Perf and Mem to parallelize the cmsRun execution as much as possible:")
1276  ##PERF##
1277  #Prepare the simpleGenReport arguments for this test:
1278  IgProfPerfPUArgs={
1279  'perfdir':perfsuitedir,
1280  'NumEvents':IgProfEvents,
1281  'candles':IgProfPUCandles,
1282  'cmsdriverOptions':cmsdriverPUOptions,
1283  'stepOptions':stepOptions,
1284  'Name':"IgProf_Perf",
1285  'profilers':profilers,
1286  'bypasshlt':bypasshlt,
1287  'userInputRootFiles':userInputFile
1288  }
1289  #Append the test to the TestsToDo list:
1290  TestsToDo.append(IgProfPerfPUArgs)
1291  self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
1292  ##MEM##
1293  #Prepare the simpleGenReport arguments for this test:
1294  IgProfMemPUArgs={
1295  'perfdir':perfsuitedir,
1296  'NumEvents':IgProfEvents,
1297  'candles':IgProfPUCandles,
1298  'cmsdriverOptions':cmsdriverPUOptions,
1299  'stepOptions':stepOptions,
1300  'Name':"IgProf_Mem",
1301  'profilers':profilers,
1302  'bypasshlt':bypasshlt,
1303  'userInputRootFiles':userInputFile
1304  }
1305  #Append the test to the TestsToDo list:
1306  TestsToDo.append(IgProfMemPUArgs)
1307  self.printFlush("Appended IgProf MEM PileUp test to the TestsToDo list")
1308  if not (IgProfCandles or IgProfPUCandles):
1309  self.printFlush("A number of events (%s) for IgProf tests was selected, but no candle for regular or pileup tests was selected!"%(IgProfEvents))
1310 
1311 
1312  #Valgrind tests:
1313  if CallgrindEvents > 0:
1314  if CallgrindCandles:
1315  self.printFlush("Preparing Callgrind tests")
1316  CallgrindArgs={
1317  'perfdir':perfsuitedir,
1318  'NumEvents':CallgrindEvents,
1319  'candles':CallgrindCandles,
1320  'cmsdriverOptions':cmsdriverOptions,
1321  'stepOptions':stepOptions,
1322  'Name':"Callgrind",
1323  'profilers':profilers,
1324  'bypasshlt':bypasshlt,
1325  'userInputRootFiles':userInputFile
1326  }
1327  #Append the test to the TestsToDo list:
1328  TestsToDo.append(CallgrindArgs)
1329  self.printFlush("Appended Callgrind test to the TestsToDo list")
1330  #Launch eventual Digi Pile Up Callgrind too:
1331  if CallgrindPUCandles:
1332  self.printFlush("Preparing Callgrind PileUp tests")
1333  CallgrindPUArgs={
1334  'perfdir':perfsuitedir,
1335  'NumEvents':CallgrindEvents,
1336  'candles':CallgrindPUCandles,
1337  'cmsdriverOptions':cmsdriverPUOptions,
1338  'stepOptions':stepOptions,
1339  'Name':"Callgrind",
1340  'profilers':profilers,
1341  'bypasshlt':bypasshlt,
1342  'userInputRootFiles':userInputFile
1343  }
1344  #Append the test to the TestsToDo list:
1345  TestsToDo.append(CallgrindPUArgs)
1346  self.printFlush("Appended Callgrind PileUp test to the TestsToDo list")
1347  if not (CallgrindCandles or CallgrindPUCandles):
1348  self.printFlush("A number of events (%s) for Callgrind tests was selected, but no candle for regular or pileup tests was selected!"%(CallgrindEvents))
1349 
1350  if MemcheckEvents > 0:
1351  if MemcheckCandles:
1352  self.printFlush("Preparing Memcheck tests")
1353  MemcheckArgs={
1354  'perfdir':perfsuitedir,
1355  'NumEvents':MemcheckEvents,
1356  'candles':MemcheckCandles,
1357  'cmsdriverOptions':cmsdriverOptions,
1358  'stepOptions':stepOptions,
1359  'Name':"Memcheck",
1360  'profilers':profilers,
1361  'bypasshlt':bypasshlt,
1362  'userInputRootFiles':userInputFile
1363  }
1364  #Append the test to the TestsToDo list:
1365  TestsToDo.append(MemcheckArgs)
1366  self.printFlush("Appended Memcheck test to the TestsToDo list")
1367  #Launch eventual Digi Pile Up Memcheck too:
1368  if MemcheckPUCandles:
1369  self.printFlush("Preparing Memcheck PileUp tests")
1370  MemcheckPUArgs={
1371  'perfdir':perfsuitedir,
1372  'NumEvents':MemcheckEvents,
1373  'candles':MemcheckPUCandles,
1374  'cmsdriverOptions':cmsdriverPUOptions,
1375  'stepOptions':stepOptions,
1376  'Name':"Memcheck",
1377  'profilers':profilers,
1378  'bypasshlt':bypasshlt,
1379  'userInputRootFiles':userInputFile
1380  }
1381  #Append the test to the TestsToDo list:
1382  TestsToDo.append(MemcheckPUArgs)
1383  self.printFlush("Appended Memcheck PileUp test to the TestsToDo list")
1384  if not (MemcheckCandles or MemcheckPUCandles):
1385  self.printFlush("A number of events (%s) for Memcheck tests was selected, but no candle for regular or pileup tests was selected!"%(MemcheckEvents))
1386 
1387  #Here if there are any IgProf, Callgrind or MemcheckEvents to be run,
1388  #run the infinite loop that submits the PerfTest() threads on the available cores:
1389  if IgProfEvents or CallgrindEvents or MemcheckEvents:
1390  #FIXME:We should consider what behavior makes most sense in case we use the --cores option at this time only the cores=0 care is considered...
1391  self.printFlush("Threading all remaining tests on all %s available cores!"%len(AvailableCores))
1392  self.printDate()
1393  self.logh.flush()
1394  #Save the original AvailableCores list to use it as a test to break the infinite loop:
1395  #While in the regular RelVal use-case it makes sense to use the actual number of cores of the machines, in
1396  #the IB case the AvailableCores will always consist of only 1 single core..
1397  OriginalAvailableCores=list(AvailableCores) #Tricky list copy bug! without the list() OriginalAvalaibleCores would point to AvailableCores!
1398  #Print this out in the log for debugging reasons
1399  self.printFlush("Original available cores list: %s"%AvailableCores)
1400 
1401  #Create a dictionary to keep track of running threads on the various cores:
1402  activePerfTestThreads={}
1403  #Flag for waiting messages:
1404  Waiting=False
1405  while True:
1406  #Check if there are tests to run:
1407  if TestsToDo:
1408  #Using the Waiting flag to avoid writing this message every 5 seconds in the case
1409  #of having more tests to do than available cores...
1410  if not Waiting:
1411  self.printFlush("Currently %s tests are scheduled to be run:"%len(TestsToDo))
1412  self.printFlush(TestsToDo)
1413  #Check the available cores:
1414  if AvailableCores:
1415  #Set waiting flag to False since we'll be doing something
1416  Waiting=False
1417  self.printFlush("There is/are %s core(s) available"%len(AvailableCores))
1418  cpu=AvailableCores.pop()
1419  self.printFlush("Let's use cpu %s"%cpu)
1420  simpleGenReportArgs=TestsToDo.pop()
1421  self.printFlush("Let's submit %s test on core %s"%(simpleGenReportArgs['Name'],cpu))
1422  #Adding a Total timer for each of the threaded tests:
1423  if simpleGenReportArgs['Name'] not in TimerInfo.keys():
1424  #if 'TotalTime' not in TimerInfo[simpleGenReportArgs['Name']].keys():
1425  self.PerfTestTotalTimer=PerfSuiteTimer(start=datetime.datetime.now()) #Start the TimeSize timer
1426  TimerInfo.update({simpleGenReportArgs['Name']:{'TotalTime':self.PerfTestTotalTimer}}) #Add the TimeSize timer to the dictionary
1427  threadToDo=self.simpleGenReportThread(cpu,self,**simpleGenReportArgs) #Need to send self too, so that the thread has access to the PerfSuite.simpleGenReport() function
1428  self.printFlush("Starting thread %s"%threadToDo)
1429  ReportExitCode=threadToDo.start()
1430  self.printFlush("Adding thread %s to the list of active threads"%threadToDo)
1431  activePerfTestThreads[cpu]=threadToDo
1432  #If there is no available core, pass, there will be some checking of activeThreads, a little sleep and then another check.
1433  else:
1434  pass
1435  #Test activePerfThreads:
1436  activeTestNames=[]
1437  activeTestNamesPU=[]
1438  for cpu in activePerfTestThreads.keys():
1439  if activePerfTestThreads[cpu].isAlive():
1440  #print "%% cpu %s activerPerfTestThreads[cpu] %s activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'] %s"%(cpu,activePerfTestThreads[cpu],activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions'])
1441  if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
1442  activeTestNamesPU.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1443  else:
1444  activeTestNames.append(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1445  pass
1446  elif cpu not in AvailableCores:
1447  #Set waiting flag to False since we'll be doing something
1448  Waiting=False
1449  self.printFlush(time.ctime())
1450  self.printFlush("%s test, in thread %s is done running on core %s"%(activePerfTestThreads[cpu].simpleGenReportArgs['Name'],activePerfTestThreads[cpu],cpu) )
1451  self.printFlush("About to append cpu %s to AvailableCores list"%cpu)
1452  AvailableCores.append(cpu)
1453  #Eliminate from activeTestNames lists:
1454  #print activeTestNames
1455  #print activeTestNamesPU
1456  #print activePerfTestThreads[cpu].simpleGenReportArgs['Name']
1457  if "--pileup" in activePerfTestThreads[cpu].simpleGenReportArgs['cmsdriverOptions']:
1458  try:
1459  activeTestNamesPU.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1460  except:
1461  pass
1462  else:
1463  try:
1464  activeTestNames.remove(activePerfTestThreads[cpu].simpleGenReportArgs['Name'])
1465  except:
1466  pass
1467  #Eliminate also from activePErfTestThreads dictionary:
1468  activePerfTestThreads.pop(cpu)
1469  #FIXME:
1470  #Delicate check to stop the timer on the individual threaded test!
1471  #Need to thik about it still...
1472  #FIXME:
1473  #Delicate check to stop the timers on the threaded tests:
1474  #Check activePerfTestThreads dictionary for "Name" if any name is missing, the total can be stopped for that name.
1475  #self.PerfTestTotalTimer
1476  for TestName in ["IgProf_Perf","IgProf_Mem","Memcheck","Valgrind"]:
1477  if (TestName not in activeTestNames) and (TestName not in activeTestNamesPU) :
1478  try:
1479  TimerInfo[TestName]['TotalTime'].set_end(datetime.datetime.now())
1480  except:
1481  #print "No %s test was running"%TestName
1482  pass
1483  #Buggy if... it seems we don't wait for the running thread to be finished...
1484  #We should request:
1485  #-All OriginalAvailableCores should be actually available.
1486  if not AvailableCores==[] and (set(AvailableCores)==set(range(cmsCpuInfo.get_NumOfCores())) or set(AvailableCores)==set(OriginalAvailableCores)) and not TestsToDo:
1487  self.printFlush("PHEW! We're done... all TestsToDo are done... at %s "%(self.getDate()))
1488  #Debug printouts:
1489  #print "AvailableCores",AvailableCores
1490  #print "set(AvailableCores)",set(AvailableCores)
1491  #print "set(range(cmsCpuInfo.get_NumOfCores())",set(range(cmsCpuInfo.get_NumOfCores()))
1492  #print "OriginalAvailableCores",OriginalAvailableCores
1493  #print "set(OriginalAvailableCores)",set(OriginalAvailableCores)
1494  #print "TestsToDo",TestsToDo
1495  break
1496  else:
1497  #Putting the sleep statement first to avoid writing Waiting... before the output of the started thread reaches the log...
1498  time.sleep(5)
1499  #Use Waiting flag to writing 1 waiting message while waiting and avoid having 1 message every 5 seconds...
1500  if not Waiting:
1501  self.printFlush(time.ctime())
1502  self.printFlush("Waiting for tests to be done...")
1503  sys.stdout.flush()
1504  Waiting=True
1505  #End of the if for IgProf, Callgrind, Memcheck tests
1506 
1507  if benching and not (self._unittest or self._noexec):
1508  #Ending the performance suite with the cmsScimark benchmarks again:
1509  for cpu in cpus:
1510  if cmsScimark > 0:
1511  self.logh.write("Ending with %s cmsScimark on cpu%s\n" % (cmsScimark,cpu))
1512  cmsScimarkFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimark PerfSuiteTimer
1513  TimerInfo['cmsScimarkTime'].update({'cmsScimarkFinal':cmsScimarkFinalTime}) #Add the cmsScimarkFinalTime information to the general TimerInfo dictionary
1514 
1515  self.benchmarks(cpu,perfsuitedir,scimark.name,cmsScimark)
1516  cmsScimarkFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1517  if cmsScimarkLarge > 0:
1518  self.logh.write("Following with %s cmsScimarkLarge on cpu%s\n" % (cmsScimarkLarge,cpu))
1519  cmsScimarkLargeFinalTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the cmsScimarkLargePerfSuiteTimer
1520  TimerInfo['cmsScimarkTime'].update({'cmsScimarkLargeFinal':cmsScimarkLargeFinalTime}) #Add the cmsScimarkLargeFinalTime information to the general TimerInfo dictionary
1521  self.benchmarks(cpu,perfsuitedir,scimarklarge.name,cmsScimarkLarge,large=True)
1522  cmsScimarkLargeFinalTime.set_end(datetime.datetime.now()) #Stop the cmsScimarkLarge Initial timer
1523 
1524  if prevrel:
1525  self.logh.write("Running the regression analysis with respect to %s\n"%getVerFromLog(prevrel))
1526  self.logh.write(time.ctime(time.time()))
1527  self.logh.flush()
1528 
1529  crr.regressReports(prevrel,os.path.abspath(perfsuitedir),oldRelName = getVerFromLog(prevrel),newRelName=self.cmssw_version)
1530 
1531  #Create a tarball of the work directory
1532  if tarball:
1533  tarballTime=PerfSuiteTimer(start=datetime.datetime.now()) #Create the tarball PerfSuiteTimer
1534  TimerInfo.update({'tarballTime':{'TotalTime':tarballTime}})
1535  # Adding the str(stepOptions to distinguish the tarballs for 1 release
1536  # (GEN->DIGI, L1->RECO will be run in parallel)
1537 
1538  # Cleaning the stepOptions from the --usersteps=:
1539  if "=" in str(stepOptions):
1540  fileStepOption=str(stepOptions).split("=")[1]
1541  else:
1542  fileStepOption=str(stepOptions)
1543  if fileStepOption=="":
1544  fileStepOption="UnknownStep"
1545  # Add the working directory used to avoid overwriting castor files (also put a check...)
1546  fileWorkingDir=os.path.basename(perfsuitedir)
1547 
1548  # Also add the --conditions and --eventcontent options used in the --cmsdriver options since it
1549  # is possible that the same tests will be run with different conditions and/or event content:
1550  # Parse it out of --cmsdriver option:
1551  fileEventContentOption="UnknownEventContent"
1552  fileConditionsOption="UnknownConditions"
1553  for token in cmsdriverOptions.split("--"):
1554  if token!='' and 'cmsdriver' not in token:
1555  if "=" in token:
1556  fileOption=token.split("=")[0]
1557  fileOptionValue=token.split("=")[1].strip("'").strip('"')
1558  else:
1559  fileOption=token.split()[0]
1560  fileOptionValue=token.split()[1].strip("'").strip('"')
1561  if "eventcontent" or "conditions" in fileOption:
1562  if "eventcontent" in fileOption:
1563  fileEventContentOption=fileOptionValue
1564  elif "conditions" in fileOption:
1565  # check if we are using the autoCond style of flexible conditions
1566  # if so, expand the condition here so that the file names contain the real conditions
1567  if "auto:" in fileOptionValue:
1568  from Configuration.AlCa.autoCond import autoCond
1569  fileConditionsOption = autoCond[ fileOptionValue.split(':')[1] ]
1570  else:
1571  # "old style" conditions, hardcoded values ...
1572  # FIXME:
1573  # Should put at least the convention in cmsPerfCommons to know how to parse it...
1574  # Potential weak point if the conditions tag convention changes...
1575  if "," in fileOptionValue: #Since 330, conditions don't have FrontierConditions_GlobalTag, in front of them anymore...
1576  fileConditionsOption=fileOptionValue.split("::")[0].split(",")[1] #"Backward" compatibility
1577  else:
1578  fileConditionsOption=fileOptionValue.split("::")[0]
1579  else: # empty token
1580  #print "Print this is the token: %s"%token
1581  pass
1582 
1583  #self.printFlush("Conditions label to add to the tarball name is %s"%fileConditionsOption)
1584  #self.printFlush("Eventcontent label to add to the tarball name is %s"%fileEventContentOption)
1585  #FIXME:
1586  #Could add the allowed event contents in the cmsPerfCommons.py file and use those to match in the command line options... This assumes maintenance of cmsPerfCommons.py
1587 
1588 
1589  #Create a tarball with just the logfiles
1590  subprocess.Popen("ls -R | grep .root > rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1591  LogFile = "%s_%s_%s_%s_%s_%s_%s_%s_log.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
1592  AbsTarFileLOG = os.path.join(perfsuitedir,LogFile)
1593  tarcmd = "tar zcfX %s %s %s" %(AbsTarFileLOG, "rootFiles", os.path.join(perfsuitedir,"*"))
1594  self.printFlush("Creating a tarball for the logfiles")
1595  self.printFlush(tarcmd)
1596  self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1597  self.printFlush(subprocess.Popen("rm rootFiles",shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1598 
1599  fullcastorpathlog=os.path.join(castordir,LogFile)
1600 
1601 
1602  #Create the tarball with the contents of the directory + md5 checksum
1603  TarFile = "%s_%s_%s_%s_%s_%s_%s_%s.tgz" % (self.cmssw_arch, self.cmssw_version, fileStepOption, fileConditionsOption, fileEventContentOption.split()[0], fileWorkingDir, self.host, self.user)
1604  AbsTarFile = os.path.join(perfsuitedir,TarFile)
1605  tarcmd = "tar -zcf %s %s" %(AbsTarFile, os.path.join(perfsuitedir,"*"))
1606  md5cmd = "md5sum %s" %(AbsTarFile)
1607  self.printFlush("Creating a tarball with the content of the directory")
1608  self.printFlush(tarcmd)
1609  self.printFlush(md5cmd)
1610  #FIXME:
1611  #Anything that will be logged after the tar command below will not enter the cmsPerfSuite.log in the tarball (by definition)...
1612  #To remain backward compatible the harvesting script needs to be based on the command above to identify the tarball location.
1613  #Obsolete popen4-> subprocess.Popen
1614  #self.printFlush(os.popen3(tarcmd)[2].read()) #Using popen3 to get only stderr we don't want the whole stdout of tar!
1615  self.printFlush(subprocess.Popen(tarcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1616  md5sum = subprocess.Popen(md5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read().split()[0]
1617  self.printFlush("The md5 checksum of the tarball: %s" %(md5sum))
1618  AbsTarFileMD5 = AbsTarFile + ".md5"
1619  md5filecmd = "echo %s > %s" % (md5sum, AbsTarFileMD5)
1620  self.printFlush(subprocess.Popen(md5filecmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read())
1621 
1622  #Archive it on CASTOR
1623  #Before archiving check if it already exist if it does print a message, but do not overwrite, so do not delete it from local dir:
1624  fullcastorpathfile=os.path.join(castordir,TarFile)
1625  fullcastorpathmd5=os.path.join(castordir,TarFile + ".md5")
1626 
1627  checkcastor="nsls %s" % fullcastorpathfile
1628  #Obsolete os.popen-> subprocess.Popen
1629  #checkcastorout=os.popen3(checkcastor)[1].read()
1630  checkcastorout=subprocess.Popen(checkcastor,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.read()
1631  if checkcastorout.rstrip()==fullcastorpathfile:
1632  castorcmdstderr="File %s is already on CASTOR! Will NOT OVERWRITE!!!"%fullcastorpathfile
1633  else:
1634  #Switching from CASTOR TO EOS, i.e. rfcp to xrdcp!
1635  #Not YET!!!
1636  #FIXME! Migrate to EOS eventually, taking into account implications for PerfDB logs linking!
1637  castorcmd="rfcp %s %s" % (AbsTarFile,fullcastorpathfile)
1638  castormd5cmd="rfcp %s %s" % (AbsTarFileMD5,fullcastorpathmd5)
1639  castorlogcmd="rfcp %s %s" % (AbsTarFileLOG,fullcastorpathlog)
1640  self.printFlush(castorcmd)
1641  self.printFlush(castormd5cmd)
1642  self.printFlush(castorlogcmd)
1643  #Obsolete os.popen-> subprocess.Popen
1644  #castorcmdstderr=os.popen3(castorcmd)[2].read()
1645  castorcmdstderr=subprocess.Popen(castorcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1646  subprocess.Popen(castormd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1647  subprocess.Popen(castorlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stderr.read()
1648  #Checking the stderr of the rfcp command to copy the tarball (.tgz) on CASTOR:
1649  if castorcmdstderr:
1650  #If it failed print the stderr message to the log and tell the user the tarball (.tgz) is kept in the working directory
1651  self.printFlush(castorcmdstderr)
1652  self.printFlush("Since the CASTOR archiving for the tarball failed the file %s is kept in directory %s"%(TarFile, perfsuitedir))
1653  else:
1654  #If it was successful then remove the tarball from the working directory:
1655  self.printFlush("Successfully archived the tarball %s in CASTOR!"%(TarFile))
1656  self.printFlush("The tarball can be found: %s"%(fullcastorpathfile))
1657  self.printFlush("The logfile can be found: %s"%(fullcastorpathlog))
1658  self.printFlush("Deleting the local copy of the tarballs")
1659  rmtarballcmd="rm -Rf %s"%(AbsTarFile)
1660  rmtarballmd5cmd="rm -Rf %s"%(AbsTarFileMD5)
1661  rmtarballlogcmd="rm -Rf %s"%(AbsTarFileLOG)
1662  self.printFlush(rmtarballcmd)
1663  self.printFlush(rmtarballmd5cmd)
1664  self.printFlush(rmtarballlogcmd)
1665  #Obsolete os.popen-> subprocess.Popen
1666  #self.printFlush(os.popen4(rmtarballcmd)[1].read())
1667  self.printFlush(subprocess.Popen(rmtarballcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1668  self.printFlush(subprocess.Popen(rmtarballmd5cmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1669  self.printFlush(subprocess.Popen(rmtarballlogcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1670  tarballTime.set_end(datetime.datetime.now())
1671  else:
1672  self.printFlush("Performance Suite directory will not be archived in a tarball since --no_tarball option was chosen")
1673 
1674  #End of script actions!
1675 
1676  #Print a time stamp at the end:
1677  date=time.ctime(time.time())
1678  self.logh.write("Performance Suite finished running at %s on %s in directory %s\n" % (date,self.host,path))
1679  if self.ERRORS == 0:
1680  self.logh.write("There were no errors detected in any of the log files!\n")
1681  else:
1682  self.logh.write("ERROR: There were %s errors detected in the log files, please revise!\n" % self.ERRORS)
1683  #print "No exit code test"
1684  #sys.exit(1)
1685  except exceptions.Exception as detail:
1686  self.logh.write(str(detail) + "\n")
1687  self.logh.flush()
1688  if not self.logh.isatty():
1689  self.logh.close()
1690  raise
1691  #Add the possibility to send as an email the execution logfile to the user and whoever else interested:
1692  if MailLogRecipients != "": #Basically leave the option to turn it off too.. --mail ""
1693  self.printFlush("Sending email notification for this execution of the performance suite with command:")
1694  sendLogByMailcmd='cat cmsPerfSuite.log |mail -s "Performance Suite finished running on %s" '%self.host + MailLogRecipients
1695  self.printFlush(sendLogByMailcmd)
1696  #Obsolete os.popen-> subprocess.Popen
1697  #self.printFlush(os.popen4(sendLogByMailcmd)[1].read())
1698  self.printFlush(subprocess.Popen(sendLogByMailcmd,shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.read() )
1699  else:
1700  self.printFlush('No email notification will be sent for this execution of the performance suite since option --mail "" was used')
1701 
1702  TotalTime.set_end(datetime.datetime.now())
1703  self.printFlush("Total Running Time\t%s hrs (%s mins)"%(TotalTime.get_duration()['hours'],TotalTime.get_duration()['minutes']))
1704 
1705  #Dump of the TimerInfo information
1706  #First dump it as a pickleq file...
1707  #Well in order to do so without the complication of serializing a custom class instance need to make the dictionary fully string-made:
1708  TimerInfoStr={}
1709  PerfSuiteTimerInfo=open("PerfSuiteTimerInfo.pkl","wb")
1710  #pickle.dump(TimerInfo,PerfSuiteTimerInfo)
1711  #PerfSuiteTimerInfo.close()
1712  #For now print it at the bottom of the log:
1713  self.logh.write("Test type\tActual Test\tDuration\tStart Time\tEnd Time\n")
1714  for key in TimerInfo.keys():
1715  #self.printFlush(key)
1716  TimerInfoStr.update({key:{}})
1717  for test in TimerInfo[key].keys():
1718  TimerInfoStr[key].update({test:[str(TimerInfo[key][test].get_duration()['hours'])+" hrs ("+str(TimerInfo[key][test].get_duration()['minutes'])+" mins)",TimerInfo[key][test].get_start(),TimerInfo[key][test].get_end()]})
1719  self.logh.write(key+"\t"+test+"\t")
1720  self.logh.write("%s hrs (%s mins)\t"%(TimerInfo[key][test].get_duration()['hours'],TimerInfo[key][test].get_duration()['minutes']))
1721  self.logh.write("%s\t"%TimerInfo[key][test].get_start())
1722  self.logh.write("%s\n"%TimerInfo[key][test].get_end())
1723  pickle.dump(TimerInfoStr,PerfSuiteTimerInfo)
1724  PerfSuiteTimerInfo.close()
1725 
1726  self.logh.write("Final Performance Suite exit code was %s"%FinalExitCode)
1727  self.logh.flush()
1728  sys.exit(FinalExitCode)
1729 
1730 def main(argv=[__name__]): #argv is a list of arguments.
1731  #Valid ways to call main with arguments:
1732  #main(["--cmsScimark",10])
1733  #main(["-t100"]) #With the caveat that the options.timeSize will be of type string... so should avoid using this!
1734  #main(["-timeSize,100])
1735  #Invalid ways:
1736  #main(["One string with all options"])
1737 
1738  #Let's instatiate the class:
1739  suite=PerfSuite()
1740 
1741  #print suite
1742  #Uncomment this for tests with main() in inteactive python:
1743  #print suite.optionParse(argv)
1744 
1745  PerfSuiteArgs={}
1746  (PerfSuiteArgs['create'],
1747  PerfSuiteArgs['castordir'],
1748  PerfSuiteArgs['TimeSizeEvents'],
1749  PerfSuiteArgs['IgProfEvents'],
1750  PerfSuiteArgs['CallgrindEvents'],
1751  PerfSuiteArgs['MemcheckEvents'],
1752  PerfSuiteArgs['cmsScimark'],
1753  PerfSuiteArgs['cmsScimarkLarge'],
1754  PerfSuiteArgs['cmsdriverOptions'],
1755  PerfSuiteArgs['cmsdriverPUOptions'],
1756  PerfSuiteArgs['stepOptions'],
1757  PerfSuiteArgs['quicktest'],
1758  PerfSuiteArgs['profilers'],
1759  PerfSuiteArgs['cpus'],
1760  PerfSuiteArgs['cores'],
1761  PerfSuiteArgs['prevrel'],
1762  PerfSuiteArgs['bypasshlt'],
1763  PerfSuiteArgs['runonspare'],
1764  PerfSuiteArgs['perfsuitedir'],
1765  PerfSuiteArgs['logfile'],
1766  PerfSuiteArgs['TimeSizeCandles'],
1767  PerfSuiteArgs['IgProfCandles'],
1768  PerfSuiteArgs['CallgrindCandles'],
1769  PerfSuiteArgs['MemcheckCandles'],
1770  PerfSuiteArgs['TimeSizePUCandles'],
1771  PerfSuiteArgs['IgProfPUCandles'],
1772  PerfSuiteArgs['CallgrindPUCandles'],
1773  PerfSuiteArgs['MemcheckPUCandles'],
1774  PerfSuiteArgs['PUInputFile'],
1775  PerfSuiteArgs['userInputFile'],
1776  PerfSuiteArgs['MailLogRecipients'],
1777  PerfSuiteArgs['tarball']
1778  ) = suite.optionParse(argv)
1779 
1780  if PerfSuiteArgs['create']: # Before anything, request the AFS volume (it takes some time...)
1781  suite.createIgVolume()
1782 
1783  if not PerfSuiteArgs['logfile'] == None:
1784  if os.path.exists(PerfSuiteArgs['logfile']):
1785  oldlogfile=PerfSuiteArgs['logfile']+"_"+time.strftime("%d-%m-%Y_%H:%M:%S")
1786  #Move old logfile to a file with the same filename plus a timestamp appended
1787  mvOldLogfile=subprocess.Popen("mv %s %s"%(PerfSuiteArgs['logfile'],oldlogfile), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1788  mvOldLogfileExitCode=mvOldLogfile.wait()
1789  #Finally open the logfile and put the information above in it:
1790  try:
1791  ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
1792  if mvOldLogfileExitCode:
1793  ActualLogfile.write("Please check what happened: A file named %s existed already and the attempt to move it to %s produced the following output: %s\n"%(PerfSuiteArgs['logfile'],oldlogfile,mvOldLogfile.stdout))
1794  else:
1795  ActualLogfile.write("***WARNING! A file named %s existed already!\n***It has been moved to %s before starting the current logfile!\n"%(PerfSuiteArgs['logfile'],oldlogfile))
1796  except (OSError, IOError) as detail:
1797  ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
1798 
1799  else:
1800  try:
1801  ActualLogfile = open(PerfSuiteArgs['logfile'],"w")
1802  except (OSError, IOError) as detail:
1803  ActualLogfile.write("Failed to open the intended logfile %s, detail error:\n%s"%(PerfSuiteArgs['logfile'],detail))
1804  ActualLogfile.flush()
1805 
1806  #Three lines to add the exact command line used to call the performance suite directly in the log.
1807  ActualLogfile.write("Performance suite invoked with command line:\n")
1808  cmdline=reduce(lambda x,y:x+" "+y,sys.argv)
1809  ActualLogfile.write(cmdline+"\n")
1810  ActualLogfile.flush()
1811 
1812  #Debug printout that we could silence...
1813  ActualLogfile.write("Initial PerfSuite Arguments:\n")
1814  for key in PerfSuiteArgs.keys():
1815  ActualLogfile.write("%s %s\n"%(key,PerfSuiteArgs[key]))
1816  ActualLogfile.flush()
1817  #print PerfSuiteArgs
1818 
1819  PerfSuiteArgs['cpu_list'] = PerfSuiteArgs['cpus'] #To access the actual number of cpus used inside the threads..
1820 
1821  #Handle in here the case of multiple cores and the loading of cores with cmsScimark:
1822  if len(PerfSuiteArgs['cpus']) > 1:
1823  ActualLogfile.write("More than 1 cpu: threading the Performance Suite!\n")
1824  outputdir=PerfSuiteArgs['perfsuitedir']
1825  runonspare=PerfSuiteArgs['runonspare'] #Save the original value of runonspare for cmsScimark stuff
1826  cpus=PerfSuiteArgs['cpus']
1827  cores=PerfSuiteArgs['cores']
1828  if runonspare:
1829  for core in range(PerfSuiteArgs['cores']):
1830  cmsScimarkLaunch_pslist={}
1831  if len(cpus) != cores: #In case of this (relval), don't load the others with cmsScimark
1832  if (core not in cpus):
1833  #self.logh.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core) + "\n")
1834  ActualLogfile.write("Submitting cmsScimarkLaunch.csh to run on core cpu "+str(core)+"\n")
1835  subcmd = "cd %s ; cmsScimarkLaunch.csh %s" % (outputdir, str(core))
1836  command="taskset -c %s sh -c \"%s\" &" % (str(core), subcmd)
1837  #self.logh.write(command + "\n")
1838  ActualLogfile.write(command+"\n")
1839  #cmsScimarkLaunch.csh is an infinite loop to spawn cmsScimark2 on the other
1840  #cpus so it makes no sense to try reading its stdout/err
1841  cmsScimarkLaunch_pslist[core]=subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
1842  ActualLogfile.write("Spawned %s \n with PID %s"%(command,cmsScimarkLaunch_pslist[core].pid))
1843  ActualLogfile.flush()
1844  PerfSuiteArgs['runonspare']=False #Set it to false to avoid cmsScimark being spawned by each thread
1845  logfile=PerfSuiteArgs['logfile']
1846  suitethread={}
1847  for cpu in cpus:
1848  #Make arguments "threaded" by setting for each instance of the suite:
1849  #1-A different output (sub)directory
1850  #2-Only 1 core on which to run
1851  #3-Automatically have a logfile... otherwise stdout is lost?
1852  #To be done:[3-A flag for Valgrind not to "thread" itself onto the other cores..]
1853  cpudir = os.path.join(outputdir,"cpu_%s" % cpu)
1854  if not os.path.exists(cpudir):
1855  os.mkdir(cpudir)
1856  PerfSuiteArgs['perfsuitedir']=cpudir
1857  PerfSuiteArgs['cpus']=[cpu] #Keeping the name cpus for now FIXME: change it to cpu in the whole code
1858  if PerfSuiteArgs['logfile']:
1859  PerfSuiteArgs['logfile']=os.path.join(cpudir,os.path.basename(PerfSuiteArgs['logfile']))
1860  else:
1861  PerfSuiteArgs['logfile']=os.path.join(cpudir,"cmsPerfSuiteThread.log")
1862  #Now spawn the thread with:
1863  suitethread[cpu]=PerfThread(**PerfSuiteArgs)
1864  #ActualLogfile.write(suitethread[cpu])
1865  ActualLogfile.write("Launching PerfSuite thread on cpu%s"%cpu)
1866  ActualLogfile.flush()
1867  #print "With arguments:"
1868  #print PerfSuiteArgs
1869  suitethread[cpu].start()
1870 
1871  while reduce(lambda x,y: x or y, map(lambda x: x.isAlive(),suitethread.values())):
1872  try:
1873  time.sleep(5.0)
1874  sys.stdout.flush()
1875  except (KeyboardInterrupt, SystemExit):
1876  raise
1877  ActualLogfile.write("All PerfSuite threads have completed!\n")
1878  ActualLogfile.flush()
1879 
1880  else: #No threading, just run the performance suite on the cpu core selected
1881  suite.runPerfSuite(**PerfSuiteArgs)
1882 
1883 if __name__ == "__main__":
1884 
1885  main(sys.argv)
Definition: start.py:1
def runCmdSet(self, cmd)
Run a list of commands using system ! We should rewrite this not to use system (most cases it is unne...
def getVerFromLog(previous)
def benchmarks(self, cpu, pfdir, name, bencher, large=False)
Run cmsScimark benchmarks a number of times.
def runCmsInput(self, cpu, dir, numevents, candle, cmsdrvopts, stepopt, profiles, bypasshlt, userInputFile)
Wrapper for cmsRelvalreportInput.
def runCmsReport(self, cpu, dir, candle)
This function is a wrapper around cmsRelvalreport.
def valFilterReport(self, dir)
Filter lines in the valgrind report that match GEN,SIM.
def __init__(self, args)
Definition: cmsPerfSuite.py:23
def displayErrors(self, file)
Display G4 cerr errors and CMSExceptions in the logfile.
def set_end(self, end=None)
Definition: cmsPerfSuite.py:44
def printFlush(self, command)
Print and flush a string (for output to a log file)
_verbose
Check logfile option.
Definition: cmsPerfSuite.py:74
def get_NumOfCores()
Definition: cmsCpuInfo.py:7
def _cleanup()
Definition: cmsPerfSuite.py:17
def testCmsDriver(self, cpu, dir, cmsver, candle)
Test cmsDriver.py (parses the simcandles file, removing duplicate lines, and runs the cmsDriver part)...
return((rh^lh)&mask)
def runcmd(self, command)
Run a command and return the exit status.
PerfTestTotalTimer
FIXME: We may want to introduce a switch here or agree on a different default (currently 10 cmsScimar...
def runPerfSuite(self, create=False, castordir="/castor/cern.ch/cms/store/relval/performance/", TimeSizeEvents=100, IgProfEvents=5, CallgrindEvents=1, MemcheckEvents=5, cmsScimark=10, cmsScimarkLarge=10, cmsdriverOptions="", cmsdriverPUOptions="", stepOptions="", quicktest=False, profilers="", cpus=[1], cpu_list=[1], cores=4, prevrel="", bypasshlt=False, runonspare=True, perfsuitedir=os.getcwd(), logfile=None, TimeSizeCandles="", IgProfCandles="", CallgrindCandles="", MemcheckCandles="", TimeSizePUCandles="", IgProfPUCandles="", CallgrindPUCandles="", MemcheckPUCandles="", PUInputFile="", userInputFile="", MailLogRecipients="", tarball="")
Runs benchmarking, cpu spinlocks on spare cores and profiles selected candles.
def simpleGenReport(self, cpus, perfdir=os.getcwd(), NumEvents=1, candles=['MinBias'], cmsdriverOptions='', stepOptions='', Name='', profilers='', bypasshlt='', userInputRootFiles='')
Prepares the profiling directory and runs all the selected profiles (if this is not a unit test) ...
def __init__(self, cpu, perfsuiteinstance, simpleGenReportArgs)
def optionParse(self, argslist=None)
def set_start(self, start=None)
Definition: cmsPerfSuite.py:42
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __init__(self, cpu, perfsuiteinstance, simpleGenReportArgs)
def __init__(self, start=None)
Definition: cmsPerfSuite.py:35
#define update(a, b)
Definition: main.py:1
_debug
Developer options.
Definition: cmsPerfSuite.py:71
def main(argv=[__name__])
#define str(s)
double split
Definition: MVATrainer.cc:139
def mkCandleDir(self, pfdir, candle, profiler)
Make directory for a particular candle and profiler.
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cprootfile(self, dir, candle, NumOfEvents, cmsdriverOptions="")
Copy root file from another candle&#39;s directory ! Again this is messy.