CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch9/src/Validation/Performance/scripts/cmsPerfClient.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 import socket, xml, xmlrpclib, os, sys, threading, Queue, time, random, pickle, exceptions
00003 import optparse as opt
00004 #Documentation needs to follow... but for now just know that
00005 #a template file for cmsPerfClient.py -f option is BencmarkCfg.py in Validation/Performance/python dir.
00006 PROG_NAME = os.path.basename(sys.argv[0])
00007 # list of valid options for the configuration file
00008 validPerfSuitKeys= ["castordir", "perfsuitedir" ,"TimeSizeEvents", "TimeSizeCandles","IgProfEvents", "IgProfCandles", "CallgrindEvents", "CallgrindCandles", "MemcheckEvents","MemcheckCandles","cmsScimark", "cmsScimarkLarge",
00009                     "cmsdriverOptions", "stepOptions", "quicktest", "profilers", "cpus", "cores", "prevrel", "isAllCandles", "candles",
00010                     "bypasshlt", "runonspare", "logfile"]
00011 #################
00012 #
00013 # Option parser
00014 # returns : Command set to run on each (or all) machines, port to connect to server, 
00015 #           List of machines to connect to, File to pickle results to,
00016 #           Dictionary to index which command set to use for which machine
00017 def optionparse():
00018 
00019     #########################
00020     # Config file type validator
00021     # Checks type of configuration options in the config file 
00022     #
00023     def _isValidPerfCmdsDef(alist):
00024         out = True
00025         for item in alist:
00026             isdict = type(item) == type({})
00027             out = out and isdict
00028             if isdict:
00029                 for key in item:
00030                     out = out and key in validPerfSuitKeys
00031                     if   key == "cpus":
00032                         out = out and type(item[key]) == type("") #has to be a string not a list!
00033                     elif key == "cores":
00034                         out = out and type(item[key]) == type("")
00035                     elif key == "castordir":
00036                         out = out and type(item[key]) == type("")
00037                     elif key == "perfsuitedir":
00038                         out = out and type(item[key]) == type("")
00039                     elif key == "TimeSizeEvents":
00040                         out = out and type(item[key]) == type(123)
00041                     elif key == "TimeSizeCandles":
00042                         out = out and type(item[key]) == type("")
00043                     elif key == "CallgrindEvents":
00044                         out = out and type(item[key]) == type(123)
00045                     elif key == "CallgrindCandles":
00046                         out = out and type(item[key]) == type("")
00047                     elif key == "IgProfEvents":
00048                         out = out and type(item[key]) == type(123)
00049                     elif key == "IgProfCandles":
00050                         out = out and type(item[key]) == type("")
00051                     elif key == "MemcheckEvents":
00052                         out = out and type(item[key]) == type(123)
00053                     elif key == "MemcheckCandles":
00054                         out = out and type(item[key]) == type("")
00055                     elif key == "cmsScimark":
00056                         out = out and type(item[key]) == type(123)
00057                     elif key == "cmsScimarkLarge":
00058                         out = out and type(item[key]) == type(123)
00059                     elif key == "cmsdriverOptions":
00060                         out = out and type(item[key]) == type("")
00061                     elif key == "stepOptions":
00062                         out = out and type(item[key]) == type("")
00063                     elif key == "quicktest":
00064                         out = out and type(item[key]) == type(False)
00065                     elif key == "profilers":
00066                         out = out and type(item[key]) == type("")
00067                     elif key == "prevrel":
00068                         out = out and type(item[key]) == type("")
00069                     elif key == "isAllCandles":
00070                         out = out and type(item[key]) == type(False)
00071                     elif key == "candles":
00072                         out = out and type(item[key]) == type("")#has to be a string not a list!
00073                     elif key == "bypasshlt":
00074                         out = out and type(item[key]) == type(False)
00075                     elif key == "runonspare":
00076                         out = out and type(item[key]) == type(False)
00077                     elif key == "logfile":
00078                         out = out and type(item[key]) == type("")
00079         return out
00080 
00081     parser = opt.OptionParser(usage=("""%s [Options]""" % PROG_NAME))
00082 
00083     parser.add_option('-p',
00084                       '--port',
00085                       type="int",
00086                       dest='port',
00087                       default=-1,
00088                       help='Connect to server on a particular port',
00089                       metavar='<PORT>',
00090                       )
00091 
00092     parser.add_option('-o',
00093                       '--output',
00094                       type="string",
00095                       dest='outfile',
00096                       default="",
00097                       help='File to output data to',
00098                       metavar='<FILE>',
00099                       )
00100 
00101     parser.add_option('-m',
00102                       '--machines',
00103                       type="string",
00104                       action="append",
00105                       dest='machines',
00106                       default=[],
00107                       help='Machines to run the benchmarking on, for each machine add another one of these options',
00108                       metavar='<MACHINES>',
00109                       )
00110 
00111     parser.add_option('-f',
00112                       '--cmd-file',
00113                       type="string",
00114                       dest='cmscmdfile',
00115                       action="append",
00116                       default=[],
00117                       help='A files of cmsPerfSuite.py commands to execute on the machines, if more than one of these options is passed and the number of these options is the same as the number of machines, the x-th machine will use the x-th config file.',
00118                       metavar='<PATH>',
00119                       )      
00120 
00121     (options, args) = parser.parse_args()
00122 
00123     ######################
00124     # Check output file location
00125     #
00126     outfile = options.outfile
00127     if not outfile == "": 
00128         outfile = os.path.abspath(options.outfile)
00129         outdir = os.path.dirname(outfile)
00130         if not os.path.isdir(outdir):
00131             parser.error("ERROR: %s is not a valid directory to create %s" % (outdir,os.path.basename(outfile)))
00132             sys.exit()
00133     else:
00134         outfile = os.path.join(os.getcwd(),"cmsmultiperfdata.pypickle")
00135         
00136     if os.path.exists(outfile):
00137         parser.error("ERROR: outfile %s already exists" % outfile)
00138         sys.exit()
00139 
00140 
00141     ###############
00142     # Check configuration files for errors
00143     #
00144     cmsperf_cmds = []
00145     cmscmdfiles = options.cmscmdfile
00146     if len(cmscmdfiles) <= 0:
00147         parser.error("A valid python file defining a list of dictionaries that represents a list of cmsPerfSuite keyword arguments must be passed to this program")
00148         sys.exit()
00149     else:
00150         for cmscmdfile in cmscmdfiles:
00151             cmdfile = os.path.abspath(cmscmdfile)
00152             print cmdfile
00153             if os.path.isfile(cmdfile):
00154                 try:
00155                     execfile(cmdfile)
00156                     cmsperf_cmds.append(listperfsuitekeywords)
00157                 except (SyntaxError), detail:
00158                     parser.error("ERROR: %s must be a valid python file" % cmdfile)
00159                     sys.exit()
00160                 except (NameError), detail:
00161                     parser.error("ERROR: %s must contain a list (variable named listperfsuitekeywords) of dictionaries that represents a list of cmsPerfSuite keyword arguments must be passed to this program: %s" % (cmdfile,str(detail)))
00162                     sys.exit()
00163                 except :
00164                     raise
00165                 if not type(cmsperf_cmds[-1]) == type([]):
00166                     parser.error("ERROR: %s must contain a list (variable named listperfsuitekeywords) of dictionaries that represents a list of cmsPerfSuite keyword arguments must be passed to this program 2" % cmdfile)
00167                     sys.exit()
00168                 if not _isValidPerfCmdsDef(cmsperf_cmds[-1]):
00169                     parser.error("ERROR: %s must contain a list (variable named listperfsuitekeywords) of dictionaries that represents a list of cmsPerfSuite keyword arguments must be passed to this program 3" % cmdfile)
00170                     sys.exit()                
00171 
00172             else:
00173                 parser.error("ERROR: %s is not a file" % cmdfile)
00174                 sys.exit()
00175 
00176     ########
00177     # Setup port number
00178     #
00179     port = 0        
00180     if options.port == -1:
00181         port = 8000
00182     else:
00183         port = options.port
00184 
00185     machines = options.machines
00186 
00187     #################
00188     # Check machine hostnames
00189     #
00190     if len(machines) <= 0:
00191         parser.error("you must specify at least one machine to benchmark")        
00192     else:
00193         machines = map(lambda x: x.strip(),machines)
00194 
00195     for machine in machines:
00196         try:
00197             output = socket.getaddrinfo(machine,port)
00198         except socket.gaierror:
00199             parser.error("ERROR: Can not resolve machine address %s (must be ip{4,6} or hostname)" % machine)
00200             sys.exit()
00201 
00202     ##############
00203     # Define which configuration file to use for which machine
00204     # If only one configuration file is used then it used for all machines
00205     cmdindex = {} # define an index that defines the commands to be run for each machine to be perfsuite'd
00206     if len(cmsperf_cmds) == 1:
00207         for machine in machines:
00208             # each value is the index in cmsperf_cmds that the machine will run
00209             # in this case all machines run the same set of commands
00210             cmdindex[machine] = 0 
00211     else:
00212         if not len(cmsperf_cmds) == len(machines):
00213             parser.error("if more than one configuration file was specified you must specify a configuration file for each machine.")
00214             sys.exit()
00215             
00216         for i in range(len(machines)):
00217             # each value is the index in cmsperf_cmds that the machine will run
00218             # in this case each machine runs the i-th configuration file passed as an option
00219             cmdindex[machine] = i         
00220 
00221     return (cmsperf_cmds, port, machines, outfile, cmdindex)
00222 
00223 #################
00224 # Request benchmark
00225 # Connects to server and returns data
00226 # returns: profiling data from server
00227 #
00228 def request_benchmark(perfcmds,shost,sport):
00229     try:
00230         server = xmlrpclib.ServerProxy("https://%s:%s" % (shost,sport))    
00231         return server.request_benchmark(perfcmds)
00232     except socket.error, detail:
00233         print "ERROR: Could not communicate with server %s:%s:" % (shost,sport), detail
00234     except xml.parsers.expat.ExpatError, detail:
00235         print "ERROR: XML-RPC could not be parsed:", detail
00236     except xmlrpclib.ProtocolError, detail:
00237         print "ERROR: XML-RPC protocol error", detail, "try using -L xxx:localhost:xxx if using ssh to forward"
00238     except exceptions, detail:
00239         print "ERROR: There was a runtime error thrown by server %s; detail follows." % shost
00240         print detail
00241 
00242 #################
00243 # Worker
00244 # This is a subclass of thread that submits commands to the server and stores the result in a thread-safe queue
00245 # 
00246 class Worker(threading.Thread):
00247 
00248     def __init__(self, host, port, perfcmds, queue):
00249         self.__perfcmds = perfcmds
00250         self.__host  = host
00251         self.__port  = port
00252         self.__queue = queue
00253         threading.Thread.__init__(self)
00254 
00255     def run(self):
00256         try:
00257             data = request_benchmark(self.__perfcmds, self.__host, self.__port)
00258             #Debugging
00259             print "data is %s"%data
00260             print "Puttin it in the queue as (%s,%s)"%(self.__host,data)
00261             self.__queue.put((self.__host, data))
00262         except (exceptions.Exception, xmlrpclib.Fault), detail:
00263             print "Exception was thrown when receiving/submitting job information to host", self.__host, ". Exception information:"
00264             print detail
00265             sys.stdout.flush()
00266 
00267 ##########################
00268 # runclient
00269 # Creates a thread for each machine to profile and waits for all machines to return data (you might consider adding a timeout in the while loop)
00270 # If the client is killed for some reason or there is an exception, dump the data to a file before throwing the exception
00271 def runclient(perfcmds, hosts, port, outfile, cmdindex):
00272     queue = Queue.Queue()
00273     # start all threads
00274     workers = []
00275     for host in hosts:
00276         print "Submitting jobs to %s..." % host
00277         w = Worker(host, port, perfcmds[cmdindex[host]], queue)
00278         w.start()                
00279         workers.append(w)
00280     print "All jobs submitted, waiting for results..."
00281     sys.stdout.flush()
00282     # run until all servers have returned data
00283     while reduce(lambda x,y: x or y, map(lambda x: x.isAlive(),workers)):
00284         try:            
00285             time.sleep(2.0)
00286             sys.stdout.flush()
00287         except (KeyboardInterrupt, SystemExit):
00288             #cleanup
00289             presentBenchmarkData(queue,outfile)            
00290             raise
00291         except:
00292             #cleanup
00293             presentBenchmarkData(queue,outfile)
00294             raise
00295     print "All job results received"
00296     print "The size with the queue containing all data is: %s "%queue.qsize()
00297     presentBenchmarkData(queue,outfile)    
00298 
00299 ########################################
00300 #
00301 # Format of the returned data from remote host should be of the form (this could be cleaned up a little bit)
00302 # 
00303 # list of command outputs [ dictionary of cpus {   }  ]
00304 #
00305 # For example:
00306 # returned data     = [ cmd_output1, cmd_output2 ... ]
00307 # cmd_output1       = { cpuid1 : cpu_output1, cpuid2 : cpu_output2 ... }     # cpuid is "None" if there was only one cpu used
00308 # cpu_output1       = { candle1  : profset_output1, candle2 : profset_output2 ... }
00309 # profset_output1   = { profset1 : profile_output1, ... }
00310 # profile_output1   = { profiletype1: step_output1, ... }
00311 # step_output1      = { step1: list_of_cpu_times, ... }
00312 # list_of_cpu_times = [ (evt_num1, secs1), ... ]
00313 
00314 ###########
00315 #
00316 # We now massage the data
00317 #
00318 def presentBenchmarkData(q,outfile):
00319     print "Pickling data to file %s"%outfile
00320     out = []            # match up the commands with each
00321                         # command that was passed in the config file
00322     while not q.empty():
00323         print "Queue size is still %s"%q.qsize()
00324         (host, data) = q.get()
00325         out.append((host,data))
00326     print "Dumping at screen the output!\n%s"%out
00327     oh = open(outfile,"wb")
00328     pickle.dump(out,oh)
00329     oh.close() 
00330 
00331 def _main():
00332     (cmsperf_cmds, port, hosts, outfile, cmdindex) = optionparse()
00333     runclient(cmsperf_cmds, hosts, port, outfile, cmdindex)
00334 
00335 if __name__ == "__main__":
00336     _main()