CMS 3D CMS Logo

edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 import os
10 import sys
11 import optparse
12 import re
13 import commands
14 from FWCore.PythonUtilities.LumiList import LumiList
15 import json
16 from pprint import pprint
17 from datetime import datetime
18 import subprocess
19 import Utilities.General.cmssw_das_client as das_client
20 help = """
21 How to use:
22 
23 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
24 
25 - or -
26 
27 edmPickEvent.py dataset listOfEvents.txt
28 
29 listOfEvents is a text file:
30 # this line is ignored as a comment
31 # since '#' is a valid comment character
32 run1 lumi_section1 event1
33 run2 lumi_section2 event2
34 
35 For example:
36 # run lum event
37 46968 2 4
38 47011 105 23
39 47011 140 12312
40 
41 run, lumi_section, and event are integers that you can get from
42 edm::Event(Auxiliary)
43 
44 dataset: it just a name of the physics dataset, if you don't know exact name
45  you can provide a mask, e.g.: *QCD*RAW
46 
47 For updated information see Wiki:
48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
49 """
50 
51 
52 ########################
53 ## Event helper class ##
54 ########################
55 
56 class Event (dict):
57 
58  dataset = None
59  splitRE = re.compile (r'[\s:,]+')
60  def __init__ (self, line, **kwargs):
61  pieces = Event.splitRE.split (line.strip())
62  try:
63  self['run'] = int( pieces[0] )
64  self['lumi'] = int( pieces[1] )
65  self['event'] = int( pieces[2] )
66  self['dataset'] = Event.dataset
67  except:
68  raise RuntimeError("Can not parse '%s' as Event object" \
69  % line.strip())
70  if not self['dataset']:
71  print "No dataset is defined for '%s'. Aborting." % line.strip()
72  raise RuntimeError('Missing dataset')
73 
74  def __getattr__ (self, key):
75  return self[key]
76 
77  def __str__ (self):
78  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
79 
80 
81 #################
82 ## Subroutines ##
83 #################
84 
85 def getFileNames(event, client=None):
86  """Return files for given DAS query"""
87  if client == 'das_client':
88  return getFileNames_das_client(event)
89  elif client == 'dasgoclient':
90  return getFileNames_dasgoclient(event)
91  # default action
92  for path in os.getenv('PATH').split(':'):
93  if os.path.isfile(os.path.join(path, 'dasgoclient')):
94  return getFileNames_dasgoclient(event)
95  return getFileNames_das_client(event)
96 
98  """Return files for given DAS query via das_client"""
99  files = []
100 
101  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
102  jsondict = das_client.get_data(query)
103  status = jsondict['status']
104  if status != 'ok':
105  print "DAS query status: %s"%(status)
106  return files
107 
108  mongo_query = jsondict['mongo_query']
109  filters = mongo_query['filters']
110  data = jsondict['data']
111 
112  files = []
113  for row in data:
114  file = [r for r in das_client.get_value(row, filters['grep'])][0]
115  if len(file) > 0 and not file in files:
116  files.append(file)
117 
118  return files
119 
121  """Return files for given DAS query via dasgoclient"""
122  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
123  cmd = ['dasgoclient', '-query', query, '-json']
124  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
125  files = []
126  err = proc.stderr.read()
127  if err:
128  print("DAS error: %s" % err)
129  else:
130  for row in json.load(proc.stdout):
131  for rec in row.get('file', []):
132  fname = rec.get('name', '')
133  if fname:
134  files.append(fname)
135  return files
136 
138  base = os.environ.get ('CMSSW_BASE')
139  if not base:
140  raise RuntimeError("CMSSW Environment not set")
141  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
142  % base
143  if os.path.exists (retval):
144  return retval
145  base = os.environ.get ('CMSSW_RELEASE_BASE')
146  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
147  % base
148  if os.path.exists (retval):
149  return retval
150  raise RuntimeError("Could not find copyPickMerge_cfg.py")
151 
153  return '%s@%s' % (commands.getoutput ('whoami'),
154  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
155 
156 def setupCrabDict (options):
157  date = datetime.now().strftime('%Y%m%d_%H%M%S')
158  crab = {}
159  base = options.base
160  crab['runEvent'] = '%s_runEvents.txt' % base
161  crab['copyPickMerge'] = fullCPMpath()
162  crab['output'] = '%s.root' % base
163  crab['crabcfg'] = '%s_crab.py' % base
164  crab['json'] = '%s.json' % base
165  crab['dataset'] = Event.dataset
166  crab['email'] = options.email
167  crab['WorkArea'] = date
168  if options.crabCondor:
169  crab['scheduler'] = 'condor'
170 # crab['useServer'] = ''
171  else:
172  crab['scheduler'] = 'remoteGlidein'
173 # crab['useServer'] = 'use_server = 1'
174  crab['useServer'] = ''
175  return crab
176 
177 # crab template
178 crabTemplate = '''
179 ## Edited By Raman Khurana
180 ##
181 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
182 ##
183 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
184 ##
185 ## Once you are happy with this file, please run
186 ## crab submit
187 
188 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
189 
190 from WMCore.Configuration import Configuration
191 config = Configuration()
192 
193 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
194 config.section_("General")
195 config.General.requestName = 'pickEvents'
196 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
197 
198 
199 config.section_("JobType")
200 config.JobType.pluginName = 'Analysis'
201 config.JobType.psetName = '%(copyPickMerge)s'
202 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
203 
204 config.section_("Data")
205 config.Data.inputDataset = '%(dataset)s'
206 
207 config.Data.inputDBS = 'global'
208 config.Data.splitting = 'LumiBased'
209 config.Data.unitsPerJob = 5
210 config.Data.lumiMask = '%(json)s'
211 #config.Data.publication = True
212 #config.Data.publishDbsUrl = 'phys03'
213 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
214 #config.JobType.allowNonProductionCMSSW=True
215 
216 config.section_("Site")
217 ## Change site name accordingly
218 config.Site.storageSite = "T2_US_Wisconsin"
219 
220 '''
221 
222 ########################
223 ## ################## ##
224 ## ## Main Program ## ##
225 ## ################## ##
226 ########################
227 
228 if __name__ == "__main__":
229  email = guessEmail()
230  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
231 facilitates picking specific events from a data set. For full details, please visit
232 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
233  parser.add_option ('--output', dest='base', type='string',
234  default='pickevents',
235  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
236  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
237  help = 'Call "cmsRun" command if possible. Can take a long time.')
238  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
239  help = 'Print "cmsRun" command instead of running it.')
240  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
241  default=20,
242  help = 'Maximum number of events allowed to be processed interactively.')
243  parser.add_option ('--crab', dest='crab', action='store_true',
244  help = 'Force CRAB setup instead of interactive mode')
245  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
246  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
247  parser.add_option ('--email', dest='email', type='string',
248  default='',
249  help="Specify email for CRAB (default '%s')" % email )
250  das_cli = ''
251  parser.add_option ('--das-client', dest='das_cli', type='string',
252  default=das_cli,
253  help="Specify das client to use (default '%s')" % das_cli )
254  (options, args) = parser.parse_args()
255 
256 
257  if len(args) < 2:
258  parser.print_help()
259  sys.exit(0)
260 
261  if not options.email:
262  options.email = email
263 
264  Event.dataset = args.pop(0)
265  commentRE = re.compile (r'#.+$')
266  colonRE = re.compile (r':')
267  eventList = []
268  if len (args) > 1 or colonRE.search (args[0]):
269  # events are coming in from the command line
270  for piece in args:
271  try:
272  event = Event (piece)
273  except:
274  raise RuntimeError("'%s' is not a proper event" % piece)
275  eventList.append (event)
276  else:
277  # read events from file
278  source = open(args[0], 'r')
279  for line in source:
280  line = commentRE.sub ('', line)
281  try:
282  event = Event (line)
283  except:
284  print "Skipping '%s'." % line.strip()
285  continue
286  eventList.append(event)
287  source.close()
288 
289  if not eventList:
290  print "No events defined. Aborting."
291  sys.exit()
292 
293  if len (eventList) > options.maxEventsInteractive:
294  options.crab = True
295 
296  if options.crab:
297 
298  ##########
299  ## CRAB ##
300  ##########
301  if options.runInteractive:
302  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
303  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
304  json = LumiList (lumis = runsAndLumis)
305  eventsToProcess = '\n'.join(\
306  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
307  crabDict = setupCrabDict (options)
308  json.writeJSON (crabDict['json'])
309  target = open (crabDict['runEvent'], 'w')
310  target.write ("%s\n" % eventsToProcess)
311  target.close()
312  target = open (crabDict['crabcfg'], 'w')
313  target.write (crabTemplate % crabDict)
314  target.close
315  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
316  if options.crabCondor:
317  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
318  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
319  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
320  print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
321 
322  else:
323 
324  #################
325  ## Interactive ##
326  #################
327  files = []
328  eventPurgeList = []
329  for event in eventList:
330  eventFiles = getFileNames(event, options.das_cli)
331  if eventFiles == ['[]']: # event not contained in the input dataset
332  print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
333  eventPurgeList.append( event )
334  else:
335  files.extend( eventFiles )
336  # Purge events
337  for event in eventPurgeList:
338  eventList.remove( event )
339  # Purge duplicate files
340  fileSet = set()
341  uniqueFiles = []
342  for filename in files:
343  if filename in fileSet:
344  continue
345  fileSet.add (filename)
346  uniqueFiles.append (filename)
347  source = ','.join (uniqueFiles) + '\n'
348  eventsToProcess = ','.join(\
349  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
350  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
351  % (options.base, eventsToProcess, source)
352  print "\n%s" % command
353  if options.runInteractive and not options.printInteractive:
354  os.system (command)
355 
def getFileNames_dasgoclient(event)
def getFileNames(event, client=None)
Subroutines ##.
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
Event helper class ##.
def __getattr__(self, key)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def getFileNames_das_client(event)
def __init__(self, line, kwargs)
def setupCrabDict(options)
double split
Definition: MVATrainer.cc:139