CMS 3D CMS Logo

edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 from __future__ import print_function
10 import os
11 import sys
12 import optparse
13 import re
14 
15 from FWCore.PythonUtilities.LumiList import LumiList
16 import json
17 from pprint import pprint
18 from datetime import datetime
19 import subprocess
20 import Utilities.General.cmssw_das_client as das_client
21 help = """
22 How to use:
23 
24 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
25 
26 - or -
27 
28 edmPickEvent.py dataset listOfEvents.txt
29 
30 listOfEvents is a text file:
31 # this line is ignored as a comment
32 # since '#' is a valid comment character
33 run1 lumi_section1 event1
34 run2 lumi_section2 event2
35 
36 For example:
37 # run lum event
38 46968 2 4
39 47011 105 23
40 47011 140 12312
41 
42 run, lumi_section, and event are integers that you can get from
43 edm::Event(Auxiliary)
44 
45 dataset: it just a name of the physics dataset, if you don't know exact name
46  you can provide a mask, e.g.: *QCD*RAW
47 
48 For updated information see Wiki:
49 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
50 """
51 
52 
53 
56 
57 class Event (dict):
58 
59  dataset = None
60  splitRE = re.compile (r'[\s:,]+')
61  def __init__ (self, line, **kwargs):
62  pieces = Event.splitRE.split (line.strip())
63  try:
64  self['run'] = int( pieces[0] )
65  self['lumi'] = int( pieces[1] )
66  self['event'] = int( pieces[2] )
67  self['dataset'] = Event.dataset
68  except:
69  raise RuntimeError("Can not parse '%s' as Event object" \
70  % line.strip())
71  if not self['dataset']:
72  print("No dataset is defined for '%s'. Aborting." % line.strip())
73  raise RuntimeError('Missing dataset')
74 
75  def __getattr__ (self, key):
76  return self[key]
77 
78  def __str__ (self):
79  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
80 
81 
82 
85 
86 def getFileNames(event, client=None):
87  """Return files for given DAS query"""
88  if client == 'das_client':
89  return getFileNames_das_client(event)
90  elif client == 'dasgoclient':
91  return getFileNames_dasgoclient(event)
92  # default action
93  for path in os.getenv('PATH').split(':'):
94  if os.path.isfile(os.path.join(path, 'dasgoclient')):
95  return getFileNames_dasgoclient(event)
96  return getFileNames_das_client(event)
97 
99  """Return files for given DAS query via das_client"""
100  files = []
101 
102  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
103  jsondict = das_client.get_data(query)
104  status = jsondict['status']
105  if status != 'ok':
106  print("DAS query status: %s"%(status))
107  return files
108 
109  mongo_query = jsondict['mongo_query']
110  filters = mongo_query['filters']
111  data = jsondict['data']
112 
113  files = []
114  for row in data:
115  file = [r for r in das_client.get_value(row, filters['grep'])][0]
116  if len(file) > 0 and not file in files:
117  files.append(file)
118 
119  return files
120 
122  """Return files for given DAS query via dasgoclient"""
123  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
124  cmd = ['dasgoclient', '-query', query, '-json']
125  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
126  files = []
127  err = proc.stderr.read()
128  if err:
129  print("DAS error: %s" % err)
130  print(proc.stdout.read())
131  sys.exit(1)
132  else:
133  dasout = proc.stdout.read()
134  try:
135  for row in json.loads(dasout):
136  for rec in row.get('file', []):
137  fname = rec.get('name', '')
138  if fname:
139  files.append(fname)
140  except:
141  print(dasout)
142  sys.exit(1)
143  return files
144 
146  base = os.environ.get ('CMSSW_BASE')
147  if not base:
148  raise RuntimeError("CMSSW Environment not set")
149  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
150  % base
151  if os.path.exists (retval):
152  return retval
153  base = os.environ.get ('CMSSW_RELEASE_BASE')
154  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
155  % base
156  if os.path.exists (retval):
157  return retval
158  raise RuntimeError("Could not find copyPickMerge_cfg.py")
159 
161  return '%s@%s' % (subprocess.getoutput ('whoami'),
162  '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
163 
164 def setupCrabDict (options):
165  date = datetime.now().strftime('%Y%m%d_%H%M%S')
166  crab = {}
167  base = options.base
168  crab['runEvent'] = '%s_runEvents.txt' % base
169  crab['copyPickMerge'] = fullCPMpath()
170  crab['output'] = '%s.root' % base
171  crab['crabcfg'] = '%s_crab.py' % base
172  crab['json'] = '%s.json' % base
173  crab['dataset'] = Event.dataset
174  crab['email'] = options.email
175  crab['WorkArea'] = date
176  if options.crabCondor:
177  crab['scheduler'] = 'condor'
178 # crab['useServer'] = ''
179  else:
180  crab['scheduler'] = 'remoteGlidein'
181 # crab['useServer'] = 'use_server = 1'
182  crab['useServer'] = ''
183  return crab
184 
185 # crab template
186 crabTemplate = '''
187 ## Edited By Raman Khurana
188 ##
189 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
190 ##
191 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
192 ##
193 ## Once you are happy with this file, please run
194 ## crab submit
195 
196 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
197 
198 from WMCore.Configuration import Configuration
199 config = Configuration()
200 
201 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
202 config.section_("General")
203 config.General.requestName = 'pickEvents'
204 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
205 
206 
207 config.section_("JobType")
208 config.JobType.pluginName = 'Analysis'
209 config.JobType.psetName = '%(copyPickMerge)s'
210 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
211 
212 config.section_("Data")
213 config.Data.inputDataset = '%(dataset)s'
214 
215 config.Data.inputDBS = 'global'
216 config.Data.splitting = 'LumiBased'
217 config.Data.unitsPerJob = 5
218 config.Data.lumiMask = '%(json)s'
219 #config.Data.publication = True
220 #config.Data.publishDbsUrl = 'phys03'
221 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
222 #config.JobType.allowNonProductionCMSSW=True
223 
224 config.section_("Site")
225 ## Change site name accordingly
226 config.Site.storageSite = "T2_US_Wisconsin"
227 
228 '''
229 
230 
235 
236 if __name__ == "__main__":
237  email = guessEmail()
238  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
239 facilitates picking specific events from a data set. For full details, please visit
240 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
241  parser.add_option ('--output', dest='base', type='string',
242  default='pickevents',
243  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
244  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
245  help = 'Call "cmsRun" command if possible. Can take a long time.')
246  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
247  help = 'Print "cmsRun" command instead of running it.')
248  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
249  default=20,
250  help = 'Maximum number of events allowed to be processed interactively.')
251  parser.add_option ('--crab', dest='crab', action='store_true',
252  help = 'Force CRAB setup instead of interactive mode')
253  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
254  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
255  parser.add_option ('--email', dest='email', type='string',
256  default='',
257  help="Specify email for CRAB (default '%s')" % email )
258  das_cli = ''
259  parser.add_option ('--das-client', dest='das_cli', type='string',
260  default=das_cli,
261  help="Specify das client to use (default '%s')" % das_cli )
262  (options, args) = parser.parse_args()
263 
264 
265  if len(args) < 2:
266  parser.print_help()
267  sys.exit(0)
268 
269  if not options.email:
270  options.email = email
271 
272  Event.dataset = args.pop(0)
273  commentRE = re.compile (r'#.+$')
274  colonRE = re.compile (r':')
275  eventList = []
276  if len (args) > 1 or colonRE.search (args[0]):
277  # events are coming in from the command line
278  for piece in args:
279  try:
280  event = Event (piece)
281  except:
282  raise RuntimeError("'%s' is not a proper event" % piece)
283  eventList.append (event)
284  else:
285  # read events from file
286  source = open(args[0], 'r')
287  for line in source:
288  line = commentRE.sub ('', line)
289  try:
290  event = Event (line)
291  except:
292  print("Skipping '%s'." % line.strip())
293  continue
294  eventList.append(event)
295  source.close()
296 
297  if not eventList:
298  print("No events defined. Aborting.")
299  sys.exit()
300 
301  if len (eventList) > options.maxEventsInteractive:
302  options.crab = True
303 
304  if options.crab:
305 
306 
309  if options.runInteractive:
310  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
311  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
312  json = LumiList (lumis = runsAndLumis)
313  eventsToProcess = '\n'.join(\
314  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
315  crabDict = setupCrabDict (options)
316  json.writeJSON (crabDict['json'])
317  target = open (crabDict['runEvent'], 'w')
318  target.write ("%s\n" % eventsToProcess)
319  target.close()
320  target = open (crabDict['crabcfg'], 'w')
321  target.write (crabTemplate % crabDict)
322  target.close
323  print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
324  if options.crabCondor:
325  print("You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
326  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
327  print("** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
328  print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
329 
330  else:
331 
332 
335  files = []
336  eventPurgeList = []
337  for event in eventList:
338  eventFiles = getFileNames(event, options.das_cli)
339  if eventFiles == ['[]']: # event not contained in the input dataset
340  print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
341  eventPurgeList.append( event )
342  else:
343  files.extend( eventFiles )
344  # Purge events
345  for event in eventPurgeList:
346  eventList.remove( event )
347  # Purge duplicate files
348  fileSet = set()
349  uniqueFiles = []
350  for filename in files:
351  if filename in fileSet:
352  continue
353  fileSet.add (filename)
354  uniqueFiles.append (filename)
355  source = ','.join (uniqueFiles) + '\n'
356  eventsToProcess = ','.join(\
357  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
358  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
359  % (options.base, eventsToProcess, source)
360  print("\n%s" % command)
361  if options.runInteractive and not options.printInteractive:
362  os.system (command)
363 
def get_value(data, filters, base=10)
Definition: das_client.py:248
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:276
def getFileNames_dasgoclient(event)
def getFileNames(event, client=None)
Subroutines ##.
Event helper class ##.
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __getattr__(self, key)
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def getFileNames_das_client(event)
def __init__(self, line, kwargs)
def setupCrabDict(options)