CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/PhysicsTools/Utilities/scripts/edmPickEvents.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 # Anzar Afaq         June 17, 2008
00004 # Oleksiy Atramentov June 21, 2008
00005 # Charles Plager     Sept  7, 2010
00006 
00007 import os
00008 import sys
00009 import optparse
00010 import re
00011 import commands
00012 import xml.sax
00013 import xml.sax.handler
00014 from FWCore.PythonUtilities.LumiList   import LumiList
00015 from xml.sax import SAXParseException
00016 from DBSAPI.dbsException import *
00017 from DBSAPI.dbsApiException import *
00018 from DBSAPI.dbsOptions import DbsOptionParser
00019 from DBSAPI.dbsApi import DbsApi
00020 from pprint import pprint
00021 
00022 
00023 help = """
00024 How to use:
00025 
00026 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
00027 
00028 - or - 
00029 
00030 edmPickEvent.py dataset listOfEvents.txt
00031 
00032 
00033 listOfEvents is a text file:
00034 # this line is ignored as a comment
00035 # since '#' is a valid comment character
00036 run1 lumi_section1 event1
00037 run2 lumi_section2 event2
00038 
00039 For example:
00040 # run lum   event
00041 46968   2      4
00042 47011 105     23
00043 47011 140  12312
00044 
00045 run, lumi_section, and event are integers that you can get from
00046 edm::Event(Auxiliary)
00047 
00048 dataset: it just a name of the physics dataset, if you don't know exact name
00049     you can provide a mask, e.g.: *QCD*RAW
00050 
00051 For updated information see Wiki:
00052 https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents 
00053 """
00054 
00055 
00056 ########################
00057 ## Event helper class ##
00058 ########################
00059 
00060 class Event (dict):
00061 
00062     dataset = None
00063     splitRE = re.compile (r'[\s:,]+')
00064     def __init__ (self, line, **kwargs):
00065         pieces = Event.splitRE.split (line.strip())
00066         try:
00067             self['run']     = int( pieces[0] )
00068             self['lumi']    = int( pieces[1] )
00069             self['event']   = int( pieces[2] )
00070             self['dataset'] =  Event.dataset
00071         except:
00072             raise RuntimeError, "Can not parse '%s' as Event object" \
00073                   % line.strip()
00074         if not self['dataset']:
00075             print "No dataset is defined for '%s'.  Aborting." % line.strip()
00076             raise RuntimeError, 'Missing dataset'
00077 
00078     def __getattr__ (self, key):
00079         return self[key]
00080 
00081     def __str__ (self):
00082         return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s"  % self
00083 
00084 
00085 ######################
00086 ## XML parser class ##
00087 ######################
00088 
00089 class Handler (xml.sax.handler.ContentHandler):
00090 
00091     def __init__(self):
00092         self.inFile = 0
00093         self.files = []
00094 
00095     def startElement(self, name, attrs):
00096         if name == 'file':
00097             self.inFile = 1
00098 
00099     def endElement(self, name):
00100         if name == 'file':
00101             self.inFile = 0
00102 
00103     def characters(self, data):
00104         if self.inFile:
00105             self.files.append(str(data))
00106     
00107 
00108 #################
00109 ## Subroutines ##
00110 #################
00111 
00112 def getFileNames (event, dbsOptions = {}):
00113     # Query DBS
00114     try:
00115         api = DbsApi (dbsOptions)
00116         query = "find file where dataset=%(dataset)s and run=%(run)i and lumi=%(lumi)i" % event
00117 
00118         xmldata = api.executeQuery(query)
00119     except DbsApiException, ex:
00120         print "Caught API Exception %s: %s "  % (ex.getClassName(), ex.getErrorMessage() )
00121         if ex.getErrorCode() not in (None, ""):
00122             print "DBS Exception Error Code: ", ex.getErrorCode()
00123 
00124     # Parse the resulting xml output.
00125     files = []
00126     try:
00127         handler = Handler()
00128         xml.sax.parseString (xmldata, handler)
00129     except SAXParseException, ex:
00130         msg = "Unable to parse XML response from DBS Server"
00131         msg += "\n  Server has not responded as desired, try setting level=DBSDEBUG"
00132         raise DbsBadXMLData(args=msg, code="5999")
00133 
00134     return handler.files
00135 
00136 
00137 def fullCPMpath():
00138     base = os.environ.get ('CMSSW_BASE')
00139     if not base:
00140         raise RuntimeError, "CMSSW Environment not set"
00141     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
00142              % base
00143     if os.path.exists (retval):
00144         return retval
00145     base = os.environ.get ('CMSSW_RELEASE_BASE')
00146     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
00147              % base
00148     if os.path.exists (retval):
00149         return retval
00150     raise RuntimeError, "Could not find copyPickMerge_cfg.py"
00151 
00152 def guessEmail():
00153     return '%s@%s' % (commands.getoutput ('whoami'),
00154                       '.'.join(commands.getoutput('hostname').split('.')[-2:]))
00155 
00156 
00157 def setupCrabDict (options):
00158     crab = {}
00159     base = options.base
00160     crab['runEvent']      = '%s_runEvents.txt' % base
00161     crab['copyPickMerge'] = fullCPMpath()
00162     crab['output']        = '%s.root' % base
00163     crab['crabcfg']       = '%s_crab.config' % base
00164     crab['json']          = '%s.json' % base
00165     crab['dataset']       = Event.dataset
00166     crab['email']         = options.email
00167     if options.crabCondor:
00168         crab['scheduler'] = 'condor'
00169         crab['useServer'] = ''
00170     else:
00171         crab['scheduler'] = 'glite'
00172         crab['useServer'] = 'use_server              = 1'
00173     return crab
00174 
00175 
00176 # crab template
00177 crabTemplate = '''
00178 # CRAB documentation:
00179 # https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrab
00180 #
00181 # Once you are happy with this file, please run
00182 # crab -create -cfg %(crabcfg)s
00183 # crab -submit -cfg %(crabcfg)s
00184 
00185 [CMSSW]
00186 pycfg_params = eventsToProcess_load=%(runEvent)s outputFile=%(output)s
00187 
00188 lumi_mask               = %(json)s
00189 total_number_of_lumis   = -1
00190 lumis_per_job           = 1
00191 pset                    = %(copyPickMerge)s
00192 datasetpath             = %(dataset)s
00193 output_file             = %(output)s
00194 
00195 [USER]
00196 return_data             = 1
00197 email                   = %(email)s
00198 
00199 # if you want to copy the data or put it in a storage element, do it
00200 # here.
00201 
00202 
00203 [CRAB]
00204 # use "glite" in general; you can "condor" if you run on CAF at FNAL or USG
00205 # site AND you know the files are available locally
00206 scheduler               = %(scheduler)s  
00207 jobtype                 = cmssw
00208 %(useServer)s
00209 '''
00210 
00211 
00212 ########################
00213 ## ################## ##
00214 ## ## Main Program ## ##
00215 ## ################## ##
00216 ########################
00217 
00218 if __name__ == "__main__":
00219     email = guessEmail()
00220     parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program facilitates picking specific events from a data set.  For full details, please visit https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents ''')
00221     parser.add_option ('--output', dest='base', type='string',
00222                        default='pickevents',
00223                        help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
00224     parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
00225                        help = 'Call "cmsRun" command if possible.  Can take a long time.')
00226     parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
00227                        help = 'Print "cmsRun" command instead of running it.')
00228     parser.add_option ('--crab', dest='crab', action='store_true',
00229                        help = 'Force CRAB setup instead of interactive mode')
00230     parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
00231                        help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
00232     parser.add_option ('--email', dest='email', type='string',
00233                        default='',
00234                        help="Specify email for CRAB (default '%s')" % email )
00235     (options, args) = parser.parse_args()
00236 
00237     
00238     if len(args) < 2:
00239         parser.print_help()
00240         sys.exit(0)
00241 
00242     if not options.email:
00243         options.email = email
00244 
00245     Event.dataset = args.pop(0)
00246     commentRE = re.compile (r'#.+$')
00247     colonRE   = re.compile (r':')
00248     eventList = []
00249     if len (args) > 1 or colonRE.search (args[0]):
00250         # events are coming in from the command line
00251         for piece in args:
00252             try:
00253                 event = Event (piece)
00254             except:
00255                 raise RuntimeError, "'%s' is not a proper event" % piece
00256             eventList.append (event)
00257     else:
00258         # read events from file
00259         source = open(args[0], 'r')
00260         for line in source:
00261             line = commentRE.sub ('', line)
00262             try:
00263                 event = Event (line)
00264             except:
00265                 print "Skipping '%s'." % line.strip()
00266                 continue
00267             eventList.append(event)
00268         source.close()
00269 
00270     if len (eventList) > 20:
00271         options.crab = True
00272 
00273     if options.crab:
00274 
00275         ##########
00276         ## CRAB ##
00277         ##########
00278         if options.runInteractive:
00279             raise RuntimeError, "This job is can not be run interactive, but rather by crab.  Please call without '--runInteractive' flag."
00280         runsAndLumis = [ (event.run, event.lumi) for event in eventList]
00281         json = LumiList (lumis = runsAndLumis)
00282         eventsToProcess = '\n'.join(\
00283           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
00284         crabDict = setupCrabDict (options)
00285         json.writeJSON (crabDict['json'])
00286         target = open (crabDict['runEvent'], 'w')
00287         target.write ("%s\n" % eventsToProcess)
00288         target.close()
00289         target = open (crabDict['crabcfg'], 'w')
00290         target.write (crabTemplate % crabDict)
00291         target.close
00292         print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
00293         if options.crabCondor:
00294             print "You are running on condor.  Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
00295             if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
00296                 print "** WARNING: ** You are missing ~/.profile file.  Please see CRABonLPCCAF instructions above.\n"
00297         print "Setup your environment for CRAB.  Then edit %(crabcfg)s to make any desired changed.  The run:\n\ncrab -create -cfg %(crabcfg)s\ncrab -submit\n" % crabDict
00298 
00299     else:
00300 
00301         #################
00302         ## Interactive ##
00303         #################    
00304         files = []
00305         for event in eventList:
00306             files.extend( getFileNames (event) )
00307         if not eventList:
00308             print "No events defind.  Aborting."
00309             sys.exit()
00310         # Purge duplicate files
00311         fileSet = set()
00312         uniqueFiles = []
00313         for filename in files:
00314             if filename in fileSet:
00315                 continue
00316             fileSet.add (filename)
00317             uniqueFiles.append (filename)
00318         source = ','.join (uniqueFiles) + '\n'
00319         eventsToProcess = ','.join(\
00320           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
00321         command = 'edmCopyPickMerge outputFile=%s.root \\\n  eventsToProcess=%s \\\n  inputFiles=%s' \
00322                   % (options.base, eventsToProcess, source)
00323         print "\n%s" % command
00324         if options.runInteractive and not options.printInteractive:
00325             os.system (command)