CMS 3D CMS Logo

edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 from __future__ import print_function
10 import os
11 import sys
12 import optparse
13 import re
14 import commands
15 from FWCore.PythonUtilities.LumiList import LumiList
16 import json
17 from pprint import pprint
18 from datetime import datetime
19 import subprocess
20 import Utilities.General.cmssw_das_client as das_client
21 help = """
22 How to use:
23 
24 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
25 
26 - or -
27 
28 edmPickEvent.py dataset listOfEvents.txt
29 
30 listOfEvents is a text file:
31 # this line is ignored as a comment
32 # since '#' is a valid comment character
33 run1 lumi_section1 event1
34 run2 lumi_section2 event2
35 
36 For example:
37 # run lum event
38 46968 2 4
39 47011 105 23
40 47011 140 12312
41 
42 run, lumi_section, and event are integers that you can get from
43 edm::Event(Auxiliary)
44 
45 dataset: it just a name of the physics dataset, if you don't know exact name
46  you can provide a mask, e.g.: *QCD*RAW
47 
48 For updated information see Wiki:
49 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
50 """
51 
52 
53 
56 
57 class Event (dict):
58 
59  dataset = None
60  splitRE = re.compile (r'[\s:,]+')
61  def __init__ (self, line, **kwargs):
62  pieces = Event.splitRE.split (line.strip())
63  try:
64  self['run'] = int( pieces[0] )
65  self['lumi'] = int( pieces[1] )
66  self['event'] = int( pieces[2] )
67  self['dataset'] = Event.dataset
68  except:
69  raise RuntimeError("Can not parse '%s' as Event object" \
70  % line.strip())
71  if not self['dataset']:
72  print("No dataset is defined for '%s'. Aborting." % line.strip())
73  raise RuntimeError('Missing dataset')
74 
75  def __getattr__ (self, key):
76  return self[key]
77 
78  def __str__ (self):
79  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
80 
81 
82 
85 
86 def getFileNames(event, client=None):
87  """Return files for given DAS query"""
88  if client == 'das_client':
89  return getFileNames_das_client(event)
90  elif client == 'dasgoclient':
91  return getFileNames_dasgoclient(event)
92  # default action
93  for path in os.getenv('PATH').split(':'):
94  if os.path.isfile(os.path.join(path, 'dasgoclient')):
95  return getFileNames_dasgoclient(event)
96  return getFileNames_das_client(event)
97 
99  """Return files for given DAS query via das_client"""
100  files = []
101 
102  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
103  jsondict = das_client.get_data(query)
104  status = jsondict['status']
105  if status != 'ok':
106  print("DAS query status: %s"%(status))
107  return files
108 
109  mongo_query = jsondict['mongo_query']
110  filters = mongo_query['filters']
111  data = jsondict['data']
112 
113  files = []
114  for row in data:
115  file = [r for r in das_client.get_value(row, filters['grep'])][0]
116  if len(file) > 0 and not file in files:
117  files.append(file)
118 
119  return files
120 
122  """Return files for given DAS query via dasgoclient"""
123  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
124  cmd = ['dasgoclient', '-query', query, '-json']
125  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
126  files = []
127  err = proc.stderr.read()
128  if err:
129  print("DAS error: %s" % err)
130  else:
131  for row in json.load(proc.stdout):
132  for rec in row.get('file', []):
133  fname = rec.get('name', '')
134  if fname:
135  files.append(fname)
136  return files
137 
139  base = os.environ.get ('CMSSW_BASE')
140  if not base:
141  raise RuntimeError("CMSSW Environment not set")
142  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
143  % base
144  if os.path.exists (retval):
145  return retval
146  base = os.environ.get ('CMSSW_RELEASE_BASE')
147  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
148  % base
149  if os.path.exists (retval):
150  return retval
151  raise RuntimeError("Could not find copyPickMerge_cfg.py")
152 
154  return '%s@%s' % (commands.getoutput ('whoami'),
155  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
156 
157 def setupCrabDict (options):
158  date = datetime.now().strftime('%Y%m%d_%H%M%S')
159  crab = {}
160  base = options.base
161  crab['runEvent'] = '%s_runEvents.txt' % base
162  crab['copyPickMerge'] = fullCPMpath()
163  crab['output'] = '%s.root' % base
164  crab['crabcfg'] = '%s_crab.py' % base
165  crab['json'] = '%s.json' % base
166  crab['dataset'] = Event.dataset
167  crab['email'] = options.email
168  crab['WorkArea'] = date
169  if options.crabCondor:
170  crab['scheduler'] = 'condor'
171 # crab['useServer'] = ''
172  else:
173  crab['scheduler'] = 'remoteGlidein'
174 # crab['useServer'] = 'use_server = 1'
175  crab['useServer'] = ''
176  return crab
177 
178 # crab template
179 crabTemplate = '''
180 ## Edited By Raman Khurana
181 ##
182 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
183 ##
184 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
185 ##
186 ## Once you are happy with this file, please run
187 ## crab submit
188 
189 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
190 
191 from WMCore.Configuration import Configuration
192 config = Configuration()
193 
194 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
195 config.section_("General")
196 config.General.requestName = 'pickEvents'
197 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
198 
199 
200 config.section_("JobType")
201 config.JobType.pluginName = 'Analysis'
202 config.JobType.psetName = '%(copyPickMerge)s'
203 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
204 
205 config.section_("Data")
206 config.Data.inputDataset = '%(dataset)s'
207 
208 config.Data.inputDBS = 'global'
209 config.Data.splitting = 'LumiBased'
210 config.Data.unitsPerJob = 5
211 config.Data.lumiMask = '%(json)s'
212 #config.Data.publication = True
213 #config.Data.publishDbsUrl = 'phys03'
214 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
215 #config.JobType.allowNonProductionCMSSW=True
216 
217 config.section_("Site")
218 ## Change site name accordingly
219 config.Site.storageSite = "T2_US_Wisconsin"
220 
221 '''
222 
223 
228 
229 if __name__ == "__main__":
230  email = guessEmail()
231  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
232 facilitates picking specific events from a data set. For full details, please visit
233 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
234  parser.add_option ('--output', dest='base', type='string',
235  default='pickevents',
236  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
237  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
238  help = 'Call "cmsRun" command if possible. Can take a long time.')
239  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
240  help = 'Print "cmsRun" command instead of running it.')
241  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
242  default=20,
243  help = 'Maximum number of events allowed to be processed interactively.')
244  parser.add_option ('--crab', dest='crab', action='store_true',
245  help = 'Force CRAB setup instead of interactive mode')
246  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
247  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
248  parser.add_option ('--email', dest='email', type='string',
249  default='',
250  help="Specify email for CRAB (default '%s')" % email )
251  das_cli = ''
252  parser.add_option ('--das-client', dest='das_cli', type='string',
253  default=das_cli,
254  help="Specify das client to use (default '%s')" % das_cli )
255  (options, args) = parser.parse_args()
256 
257 
258  if len(args) < 2:
259  parser.print_help()
260  sys.exit(0)
261 
262  if not options.email:
263  options.email = email
264 
265  Event.dataset = args.pop(0)
266  commentRE = re.compile (r'#.+$')
267  colonRE = re.compile (r':')
268  eventList = []
269  if len (args) > 1 or colonRE.search (args[0]):
270  # events are coming in from the command line
271  for piece in args:
272  try:
273  event = Event (piece)
274  except:
275  raise RuntimeError("'%s' is not a proper event" % piece)
276  eventList.append (event)
277  else:
278  # read events from file
279  source = open(args[0], 'r')
280  for line in source:
281  line = commentRE.sub ('', line)
282  try:
283  event = Event (line)
284  except:
285  print("Skipping '%s'." % line.strip())
286  continue
287  eventList.append(event)
288  source.close()
289 
290  if not eventList:
291  print("No events defined. Aborting.")
292  sys.exit()
293 
294  if len (eventList) > options.maxEventsInteractive:
295  options.crab = True
296 
297  if options.crab:
298 
299 
302  if options.runInteractive:
303  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
304  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
305  json = LumiList (lumis = runsAndLumis)
306  eventsToProcess = '\n'.join(\
307  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
308  crabDict = setupCrabDict (options)
309  json.writeJSON (crabDict['json'])
310  target = open (crabDict['runEvent'], 'w')
311  target.write ("%s\n" % eventsToProcess)
312  target.close()
313  target = open (crabDict['crabcfg'], 'w')
314  target.write (crabTemplate % crabDict)
315  target.close
316  print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
317  if options.crabCondor:
318  print("You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
319  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
320  print("** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
321  print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
322 
323  else:
324 
325 
328  files = []
329  eventPurgeList = []
330  for event in eventList:
331  eventFiles = getFileNames(event, options.das_cli)
332  if eventFiles == ['[]']: # event not contained in the input dataset
333  print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
334  eventPurgeList.append( event )
335  else:
336  files.extend( eventFiles )
337  # Purge events
338  for event in eventPurgeList:
339  eventList.remove( event )
340  # Purge duplicate files
341  fileSet = set()
342  uniqueFiles = []
343  for filename in files:
344  if filename in fileSet:
345  continue
346  fileSet.add (filename)
347  uniqueFiles.append (filename)
348  source = ','.join (uniqueFiles) + '\n'
349  eventsToProcess = ','.join(\
350  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
351  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
352  % (options.base, eventsToProcess, source)
353  print("\n%s" % command)
354  if options.runInteractive and not options.printInteractive:
355  os.system (command)
356 
edmPickEvents.Event.__str__
def __str__(self)
Definition: edmPickEvents.py:78
edmPickEvents.getFileNames_dasgoclient
def getFileNames_dasgoclient(event)
Definition: edmPickEvents.py:121
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
cms::dd::split
std::vector< std::string_view > split(std::string_view, const char *)
edmPickEvents.Event.__init__
def __init__(self, line, **kwargs)
Definition: edmPickEvents.py:61
edmPickEvents.getFileNames
def getFileNames(event, client=None)
Subroutines ##.
Definition: edmPickEvents.py:86
edmPickEvents.guessEmail
def guessEmail()
Definition: edmPickEvents.py:153
edmPickEvents.fullCPMpath
def fullCPMpath()
Definition: edmPickEvents.py:138
das_client.get_value
def get_value(data, filters, base=10)
Definition: das_client.py:248
createfilelist.int
int
Definition: createfilelist.py:10
edmPickEvents.Event
Event helper class ##.
Definition: edmPickEvents.py:57
edm::print
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
das_client.get_data
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:275
edmPickEvents.setupCrabDict
def setupCrabDict(options)
Definition: edmPickEvents.py:157
edmPickEvents.Event.__getattr__
def __getattr__(self, key)
Definition: edmPickEvents.py:75
edmPickEvents.getFileNames_das_client
def getFileNames_das_client(event)
Definition: edmPickEvents.py:98