00001
00002
00003
00004
00005
00006
00007 import os
00008 import sys
00009 import optparse
00010 import re
00011 import commands
00012 import xml.sax
00013 import xml.sax.handler
00014 from FWCore.PythonUtilities.LumiList import LumiList
00015 from xml.sax import SAXParseException
00016 from DBSAPI.dbsException import *
00017 from DBSAPI.dbsApiException import *
00018 from DBSAPI.dbsOptions import DbsOptionParser
00019 from DBSAPI.dbsApi import DbsApi
00020 from pprint import pprint
00021
00022
00023 help = """
00024 How to use:
00025
00026 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
00027
00028 - or -
00029
00030 edmPickEvent.py dataset listOfEvents.txt
00031
00032
00033 listOfEvents is a text file:
00034 # this line is ignored as a comment
00035 # since '#' is a valid comment character
00036 run1 lumi_section1 event1
00037 run2 lumi_section2 event2
00038
00039 For example:
00040 # run lum event
00041 46968 2 4
00042 47011 105 23
00043 47011 140 12312
00044
00045 run, lumi_section, and event are integers that you can get from
00046 edm::Event(Auxiliary)
00047
00048 dataset: it just a name of the physics dataset, if you don't know exact name
00049 you can provide a mask, e.g.: *QCD*RAW
00050
00051 For updated information see Wiki:
00052 https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents
00053 """
00054
00055
00056
00057
00058
00059
00060 class Event (dict):
00061
00062 dataset = None
00063 splitRE = re.compile (r'[\s:,]+')
00064 def __init__ (self, line, **kwargs):
00065 pieces = Event.splitRE.split (line.strip())
00066 try:
00067 self['run'] = int( pieces[0] )
00068 self['lumi'] = int( pieces[1] )
00069 self['event'] = int( pieces[2] )
00070 self['dataset'] = Event.dataset
00071 except:
00072 raise RuntimeError, "Can not parse '%s' as Event object" \
00073 % line.strip()
00074 if not self['dataset']:
00075 print "No dataset is defined for '%s'. Aborting." % line.strip()
00076 raise RuntimeError, 'Missing dataset'
00077
00078 def __getattr__ (self, key):
00079 return self[key]
00080
00081 def __str__ (self):
00082 return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
00083
00084
00085
00086
00087
00088
00089 class Handler (xml.sax.handler.ContentHandler):
00090
00091 def __init__(self):
00092 self.inFile = 0
00093 self.files = []
00094
00095 def startElement(self, name, attrs):
00096 if name == 'file':
00097 self.inFile = 1
00098
00099 def endElement(self, name):
00100 if name == 'file':
00101 self.inFile = 0
00102
00103 def characters(self, data):
00104 if self.inFile:
00105 self.files.append(str(data))
00106
00107
00108
00109
00110
00111
00112 def getFileNames (event, dbsOptions = {}):
00113
00114 try:
00115 api = DbsApi (dbsOptions)
00116 query = "find file where dataset=%(dataset)s and run=%(run)i and lumi=%(lumi)i" % event
00117
00118 xmldata = api.executeQuery(query)
00119 except DbsApiException, ex:
00120 print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() )
00121 if ex.getErrorCode() not in (None, ""):
00122 print "DBS Exception Error Code: ", ex.getErrorCode()
00123
00124
00125 files = []
00126 try:
00127 handler = Handler()
00128 xml.sax.parseString (xmldata, handler)
00129 except SAXParseException, ex:
00130 msg = "Unable to parse XML response from DBS Server"
00131 msg += "\n Server has not responded as desired, try setting level=DBSDEBUG"
00132 raise DbsBadXMLData(args=msg, code="5999")
00133
00134 return handler.files
00135
00136
00137 def fullCPMpath():
00138 base = os.environ.get ('CMSSW_BASE')
00139 if not base:
00140 raise RuntimeError, "CMSSW Environment not set"
00141 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
00142 % base
00143 if os.path.exists (retval):
00144 return retval
00145 base = os.environ.get ('CMSSW_RELEASE_BASE')
00146 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
00147 % base
00148 if os.path.exists (retval):
00149 return retval
00150 raise RuntimeError, "Could not find copyPickMerge_cfg.py"
00151
00152 def guessEmail():
00153 return '%s@%s' % (commands.getoutput ('whoami'),
00154 '.'.join(commands.getoutput('hostname').split('.')[-2:]))
00155
00156
00157 def setupCrabDict (options):
00158 crab = {}
00159 base = options.base
00160 crab['runEvent'] = '%s_runEvents.txt' % base
00161 crab['copyPickMerge'] = fullCPMpath()
00162 crab['output'] = '%s.root' % base
00163 crab['crabcfg'] = '%s_crab.config' % base
00164 crab['json'] = '%s.json' % base
00165 crab['dataset'] = Event.dataset
00166 crab['email'] = options.email
00167 if options.crabCondor:
00168 crab['scheduler'] = 'condor'
00169 crab['useServer'] = ''
00170 else:
00171 crab['scheduler'] = 'glite'
00172 crab['useServer'] = 'use_server = 1'
00173 return crab
00174
00175
00176
00177 crabTemplate = '''
00178 # CRAB documentation:
00179 # https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrab
00180 #
00181 # Once you are happy with this file, please run
00182 # crab -create -cfg %(crabcfg)s
00183 # crab -submit -cfg %(crabcfg)s
00184
00185 [CMSSW]
00186 pycfg_params = eventsToProcess_load=%(runEvent)s outputFile=%(output)s
00187
00188 lumi_mask = %(json)s
00189 total_number_of_lumis = -1
00190 lumis_per_job = 1
00191 pset = %(copyPickMerge)s
00192 datasetpath = %(dataset)s
00193 output_file = %(output)s
00194
00195 [USER]
00196 return_data = 1
00197 email = %(email)s
00198
00199 # if you want to copy the data or put it in a storage element, do it
00200 # here.
00201
00202
00203 [CRAB]
00204 # use "glite" in general; you can "condor" if you run on CAF at FNAL or USG
00205 # site AND you know the files are available locally
00206 scheduler = %(scheduler)s
00207 jobtype = cmssw
00208 %(useServer)s
00209 '''
00210
00211
00212
00213
00214
00215
00216
00217
00218 if __name__ == "__main__":
00219 email = guessEmail()
00220 parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program facilitates picking specific events from a data set. For full details, please visit https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents ''')
00221 parser.add_option ('--output', dest='base', type='string',
00222 default='pickevents',
00223 help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
00224 parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
00225 help = 'Call "cmsRun" command if possible. Can take a long time.')
00226 parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
00227 help = 'Print "cmsRun" command instead of running it.')
00228 parser.add_option ('--crab', dest='crab', action='store_true',
00229 help = 'Force CRAB setup instead of interactive mode')
00230 parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
00231 help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
00232 parser.add_option ('--email', dest='email', type='string',
00233 default='',
00234 help="Specify email for CRAB (default '%s')" % email )
00235 (options, args) = parser.parse_args()
00236
00237
00238 if len(args) < 2:
00239 parser.print_help()
00240 sys.exit(0)
00241
00242 if not options.email:
00243 options.email = email
00244
00245 Event.dataset = args.pop(0)
00246 commentRE = re.compile (r'#.+$')
00247 colonRE = re.compile (r':')
00248 eventList = []
00249 if len (args) > 1 or colonRE.search (args[0]):
00250
00251 for piece in args:
00252 try:
00253 event = Event (piece)
00254 except:
00255 raise RuntimeError, "'%s' is not a proper event" % piece
00256 eventList.append (event)
00257 else:
00258
00259 source = open(args[0], 'r')
00260 for line in source:
00261 line = commentRE.sub ('', line)
00262 try:
00263 event = Event (line)
00264 except:
00265 print "Skipping '%s'." % line.strip()
00266 continue
00267 eventList.append(event)
00268 source.close()
00269
00270 if len (eventList) > 20:
00271 options.crab = True
00272
00273 if options.crab:
00274
00275
00276
00277
00278 if options.runInteractive:
00279 raise RuntimeError, "This job is can not be run interactive, but rather by crab. Please call without '--runInteractive' flag."
00280 runsAndLumis = [ (event.run, event.lumi) for event in eventList]
00281 json = LumiList (lumis = runsAndLumis)
00282 eventsToProcess = '\n'.join(\
00283 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
00284 crabDict = setupCrabDict (options)
00285 json.writeJSON (crabDict['json'])
00286 target = open (crabDict['runEvent'], 'w')
00287 target.write ("%s\n" % eventsToProcess)
00288 target.close()
00289 target = open (crabDict['crabcfg'], 'w')
00290 target.write (crabTemplate % crabDict)
00291 target.close
00292 print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
00293 if options.crabCondor:
00294 print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
00295 if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
00296 print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
00297 print "Setup your environment for CRAB. Then edit %(crabcfg)s to make any desired changed. The run:\n\ncrab -create -cfg %(crabcfg)s\ncrab -submit\n" % crabDict
00298
00299 else:
00300
00301
00302
00303
00304 files = []
00305 for event in eventList:
00306 files.extend( getFileNames (event) )
00307 if not eventList:
00308 print "No events defind. Aborting."
00309 sys.exit()
00310
00311 fileSet = set()
00312 uniqueFiles = []
00313 for filename in files:
00314 if filename in fileSet:
00315 continue
00316 fileSet.add (filename)
00317 uniqueFiles.append (filename)
00318 source = ','.join (uniqueFiles) + '\n'
00319 eventsToProcess = ','.join(\
00320 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
00321 command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
00322 % (options.base, eventsToProcess, source)
00323 print "\n%s" % command
00324 if options.runInteractive and not options.printInteractive:
00325 os.system (command)