CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 
7 import os
8 import sys
9 import optparse
10 import re
11 import commands
12 import xml.sax
13 import xml.sax.handler
14 from FWCore.PythonUtilities.LumiList import LumiList
15 from xml.sax import SAXParseException
16 from DBSAPI.dbsException import *
17 from DBSAPI.dbsApiException import *
18 from DBSAPI.dbsOptions import DbsOptionParser
19 from DBSAPI.dbsApi import DbsApi
20 from pprint import pprint
21 
22 
23 help = """
24 How to use:
25 
26 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
27 
28 - or -
29 
30 edmPickEvent.py dataset listOfEvents.txt
31 
32 
33 listOfEvents is a text file:
34 # this line is ignored as a comment
35 # since '#' is a valid comment character
36 run1 lumi_section1 event1
37 run2 lumi_section2 event2
38 
39 For example:
40 # run lum event
41 46968 2 4
42 47011 105 23
43 47011 140 12312
44 
45 run, lumi_section, and event are integers that you can get from
46 edm::Event(Auxiliary)
47 
48 dataset: it just a name of the physics dataset, if you don't know exact name
49  you can provide a mask, e.g.: *QCD*RAW
50 
51 For updated information see Wiki:
52 https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents
53 """
54 
55 
56 ########################
57 ## Event helper class ##
58 ########################
59 
60 class Event (dict):
61 
62  dataset = None
63  splitRE = re.compile (r'[\s:,]+')
64  def __init__ (self, line, **kwargs):
65  pieces = Event.splitRE.split (line.strip())
66  try:
67  self['run'] = int( pieces[0] )
68  self['lumi'] = int( pieces[1] )
69  self['event'] = int( pieces[2] )
70  self['dataset'] = Event.dataset
71  except:
72  raise RuntimeError, "Can not parse '%s' as Event object" \
73  % line.strip()
74  if not self['dataset']:
75  print "No dataset is defined for '%s'. Aborting." % line.strip()
76  raise RuntimeError, 'Missing dataset'
77 
78  def __getattr__ (self, key):
79  return self[key]
80 
81  def __str__ (self):
82  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
83 
84 
85 ######################
86 ## XML parser class ##
87 ######################
88 
89 class Handler (xml.sax.handler.ContentHandler):
90 
91  def __init__(self):
92  self.inFile = 0
93  self.files = []
94 
95  def startElement(self, name, attrs):
96  if name == 'file':
97  self.inFile = 1
98 
99  def endElement(self, name):
100  if name == 'file':
101  self.inFile = 0
102 
103  def characters(self, data):
104  if self.inFile:
105  self.files.append(str(data))
106 
107 
108 #################
109 ## Subroutines ##
110 #################
111 
112 def getFileNames (event, dbsOptions = {}):
113  # Query DBS
114  try:
115  api = DbsApi (dbsOptions)
116  query = "find file where dataset=%(dataset)s and run=%(run)i and lumi=%(lumi)i" % event
117 
118  xmldata = api.executeQuery(query)
119  except DbsApiException, ex:
120  print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() )
121  if ex.getErrorCode() not in (None, ""):
122  print "DBS Exception Error Code: ", ex.getErrorCode()
123 
124  # Parse the resulting xml output.
125  files = []
126  try:
127  handler = Handler()
128  xml.sax.parseString (xmldata, handler)
129  except SAXParseException, ex:
130  msg = "Unable to parse XML response from DBS Server"
131  msg += "\n Server has not responded as desired, try setting level=DBSDEBUG"
132  raise DbsBadXMLData(args=msg, code="5999")
133 
134  return handler.files
135 
136 
138  base = os.environ.get ('CMSSW_BASE')
139  if not base:
140  raise RuntimeError, "CMSSW Environment not set"
141  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
142  % base
143  if os.path.exists (retval):
144  return retval
145  base = os.environ.get ('CMSSW_RELEASE_BASE')
146  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
147  % base
148  if os.path.exists (retval):
149  return retval
150  raise RuntimeError, "Could not find copyPickMerge_cfg.py"
151 
153  return '%s@%s' % (commands.getoutput ('whoami'),
154  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
155 
156 
157 def setupCrabDict (options):
158  crab = {}
159  base = options.base
160  crab['runEvent'] = '%s_runEvents.txt' % base
161  crab['copyPickMerge'] = fullCPMpath()
162  crab['output'] = '%s.root' % base
163  crab['crabcfg'] = '%s_crab.config' % base
164  crab['json'] = '%s.json' % base
165  crab['dataset'] = Event.dataset
166  crab['email'] = options.email
167  if options.crabCondor:
168  crab['scheduler'] = 'condor'
169  crab['useServer'] = ''
170  else:
171  crab['scheduler'] = 'glite'
172  crab['useServer'] = 'use_server = 1'
173  return crab
174 
175 
176 # crab template
177 crabTemplate = '''
178 # CRAB documentation:
179 # https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrab
180 #
181 # Once you are happy with this file, please run
182 # crab -create -cfg %(crabcfg)s
183 # crab -submit -cfg %(crabcfg)s
184 
185 [CMSSW]
186 pycfg_params = eventsToProcess_load=%(runEvent)s outputFile=%(output)s
187 
188 lumi_mask = %(json)s
189 total_number_of_lumis = -1
190 lumis_per_job = 1
191 pset = %(copyPickMerge)s
192 datasetpath = %(dataset)s
193 output_file = %(output)s
194 
195 [USER]
196 return_data = 1
197 email = %(email)s
198 
199 # if you want to copy the data or put it in a storage element, do it
200 # here.
201 
202 
203 [CRAB]
204 # use "glite" in general; you can "condor" if you run on CAF at FNAL or USG
205 # site AND you know the files are available locally
206 scheduler = %(scheduler)s
207 jobtype = cmssw
208 %(useServer)s
209 '''
210 
211 
212 ########################
213 ## ################## ##
214 ## ## Main Program ## ##
215 ## ################## ##
216 ########################
217 
218 if __name__ == "__main__":
219  email = guessEmail()
220  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program facilitates picking specific events from a data set. For full details, please visit https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents ''')
221  parser.add_option ('--output', dest='base', type='string',
222  default='pickevents',
223  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
224  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
225  help = 'Call "cmsRun" command if possible. Can take a long time.')
226  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
227  help = 'Print "cmsRun" command instead of running it.')
228  parser.add_option ('--crab', dest='crab', action='store_true',
229  help = 'Force CRAB setup instead of interactive mode')
230  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
231  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
232  parser.add_option ('--email', dest='email', type='string',
233  default='',
234  help="Specify email for CRAB (default '%s')" % email )
235  (options, args) = parser.parse_args()
236 
237 
238  if len(args) < 2:
239  parser.print_help()
240  sys.exit(0)
241 
242  if not options.email:
243  options.email = email
244 
245  Event.dataset = args.pop(0)
246  commentRE = re.compile (r'#.+$')
247  colonRE = re.compile (r':')
248  eventList = []
249  if len (args) > 1 or colonRE.search (args[0]):
250  # events are coming in from the command line
251  for piece in args:
252  try:
253  event = Event (piece)
254  except:
255  raise RuntimeError, "'%s' is not a proper event" % piece
256  eventList.append (event)
257  else:
258  # read events from file
259  source = open(args[0], 'r')
260  for line in source:
261  line = commentRE.sub ('', line)
262  try:
263  event = Event (line)
264  except:
265  print "Skipping '%s'." % line.strip()
266  continue
267  eventList.append(event)
268  source.close()
269 
270  if len (eventList) > 20:
271  options.crab = True
272 
273  if options.crab:
274 
275  ##########
276  ## CRAB ##
277  ##########
278  if options.runInteractive:
279  raise RuntimeError, "This job is can not be run interactive, but rather by crab. Please call without '--runInteractive' flag."
280  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
281  json = LumiList (lumis = runsAndLumis)
282  eventsToProcess = '\n'.join(\
283  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
284  crabDict = setupCrabDict (options)
285  json.writeJSON (crabDict['json'])
286  target = open (crabDict['runEvent'], 'w')
287  target.write ("%s\n" % eventsToProcess)
288  target.close()
289  target = open (crabDict['crabcfg'], 'w')
290  target.write (crabTemplate % crabDict)
291  target.close
292  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
293  if options.crabCondor:
294  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
295  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
296  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
297  print "Setup your environment for CRAB. Then edit %(crabcfg)s to make any desired changed. The run:\n\ncrab -create -cfg %(crabcfg)s\ncrab -submit\n" % crabDict
298 
299  else:
300 
301  #################
302  ## Interactive ##
303  #################
304  files = []
305  for event in eventList:
306  files.extend( getFileNames (event) )
307  if not eventList:
308  print "No events defind. Aborting."
309  sys.exit()
310  # Purge duplicate files
311  fileSet = set()
312  uniqueFiles = []
313  for filename in files:
314  if filename in fileSet:
315  continue
316  fileSet.add (filename)
317  uniqueFiles.append (filename)
318  source = ','.join (uniqueFiles) + '\n'
319  eventsToProcess = ','.join(\
320  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
321  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
322  % (options.base, eventsToProcess, source)
323  print "\n%s" % command
324  if options.runInteractive and not options.printInteractive:
325  os.system (command)
Event helper class ##.
def getFileNames
Subroutines ##.
XML parser class ##.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139