CMS 3D CMS Logo

edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 import os
10 import sys
11 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
12 import re
13 
14 from FWCore.PythonUtilities.LumiList import LumiList
15 import json
16 from pprint import pprint
17 from datetime import datetime
18 import subprocess
19 import Utilities.General.cmssw_das_client as das_client
20 help = """
21 How to use:
22 
23 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
24 
25 - or -
26 
27 edmPickEvent.py dataset listOfEvents.txt
28 
29 listOfEvents is a text file:
30 # this line is ignored as a comment
31 # since '#' is a valid comment character
32 run1 lumi_section1 event1
33 run2 lumi_section2 event2
34 
35 For example:
36 # run lum event
37 46968 2 4
38 47011 105 23
39 47011 140 12312
40 
41 run, lumi_section, and event are integers that you can get from
42 edm::Event(Auxiliary)
43 
44 dataset: it just a name of the physics dataset, if you don't know exact name
45  you can provide a mask, e.g.: *QCD*RAW
46 
47 For updated information see Wiki:
48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
49 """
50 
51 
52 
55 
56 class Event (dict):
57 
58  dataset = None
59  splitRE = re.compile (r'[\s:,]+')
60  def __init__ (self, line, **kwargs):
61  pieces = Event.splitRE.split (line.strip())
62  try:
63  self['run'] = int( pieces[0] )
64  self['lumi'] = int( pieces[1] )
65  self['event'] = int( pieces[2] )
66  self['dataset'] = Event.dataset
67  except:
68  raise RuntimeError("Can not parse '%s' as Event object" \
69  % line.strip())
70  if not self['dataset']:
71  print("No dataset is defined for '%s'. Aborting." % line.strip())
72  raise RuntimeError('Missing dataset')
73 
74  def __getattr__ (self, key):
75  return self[key]
76 
77  def __str__ (self):
78  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
79 
80 
81 
84 
85 def getFileNames(event, client=None):
86  """Return files for given DAS query"""
87  if client == 'das_client':
88  return getFileNames_das_client(event)
89  elif client == 'dasgoclient':
90  return getFileNames_dasgoclient(event)
91  # default action
92  for path in os.getenv('PATH').split(':'):
93  if os.path.isfile(os.path.join(path, 'dasgoclient')):
94  return getFileNames_dasgoclient(event)
95  return getFileNames_das_client(event)
96 
98  """Return files for given DAS query via das_client"""
99  files = []
100 
101  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
102  jsondict = das_client.get_data(query)
103  status = jsondict['status']
104  if status != 'ok':
105  print("DAS query status: %s"%(status))
106  return files
107 
108  mongo_query = jsondict['mongo_query']
109  filters = mongo_query['filters']
110  data = jsondict['data']
111 
112  files = []
113  for row in data:
114  file = [r for r in das_client.get_value(row, filters['grep'])][0]
115  if len(file) > 0 and not file in files:
116  files.append(file)
117 
118  return files
119 
121  """Return files for given DAS query via dasgoclient"""
122  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
123  cmd = ['dasgoclient', '-query', query, '-json']
124  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
125  files = []
126  err = proc.stderr.read()
127  if err:
128  print("DAS error: %s" % err)
129  print(proc.stdout.read())
130  sys.exit(1)
131  else:
132  dasout = proc.stdout.read()
133  try:
134  for row in json.loads(dasout):
135  for rec in row.get('file', []):
136  fname = rec.get('name', '')
137  if fname:
138  files.append(fname)
139  except:
140  print(dasout)
141  sys.exit(1)
142  return files
143 
145  base = os.environ.get ('CMSSW_BASE')
146  if not base:
147  raise RuntimeError("CMSSW Environment not set")
148  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
149  % base
150  if os.path.exists (retval):
151  return retval
152  base = os.environ.get ('CMSSW_RELEASE_BASE')
153  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
154  % base
155  if os.path.exists (retval):
156  return retval
157  raise RuntimeError("Could not find copyPickMerge_cfg.py")
158 
160  return '%s@%s' % (subprocess.getoutput ('whoami'),
161  '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
162 
163 def setupCrabDict (options):
164  date = datetime.now().strftime('%Y%m%d_%H%M%S')
165  crab = {}
166  base = options.base
167  crab['runEvent'] = '%s_runEvents.txt' % base
168  crab['copyPickMerge'] = fullCPMpath()
169  crab['output'] = '%s.root' % base
170  crab['crabcfg'] = '%s_crab.py' % base
171  crab['json'] = '%s.json' % base
172  crab['dataset'] = Event.dataset
173  crab['email'] = options.email
174  crab['WorkArea'] = date
175  if options.crabCondor:
176  crab['scheduler'] = 'condor'
177 # crab['useServer'] = ''
178  else:
179  crab['scheduler'] = 'remoteGlidein'
180 # crab['useServer'] = 'use_server = 1'
181  crab['useServer'] = ''
182  return crab
183 
184 # crab template
185 crabTemplate = '''
186 ## Edited By Raman Khurana
187 ##
188 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
189 ##
190 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
191 ##
192 ## Once you are happy with this file, please run
193 ## crab submit
194 
195 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
196 
197 from WMCore.Configuration import Configuration
198 config = Configuration()
199 
200 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
201 config.section_("General")
202 config.General.requestName = 'pickEvents'
203 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
204 
205 
206 config.section_("JobType")
207 config.JobType.pluginName = 'Analysis'
208 config.JobType.psetName = '%(copyPickMerge)s'
209 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
210 
211 config.section_("Data")
212 config.Data.inputDataset = '%(dataset)s'
213 
214 config.Data.inputDBS = 'global'
215 config.Data.splitting = 'LumiBased'
216 config.Data.unitsPerJob = 5
217 config.Data.lumiMask = '%(json)s'
218 #config.Data.publication = True
219 #config.Data.publishDbsUrl = 'phys03'
220 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
221 #config.JobType.allowNonProductionCMSSW=True
222 
223 config.section_("Site")
224 ## Change site name accordingly
225 config.Site.storageSite = "T2_US_Wisconsin"
226 
227 '''
228 
229 
234 
235 if __name__ == "__main__":
236  email = guessEmail()
237  parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description='''This program
238 facilitates picking specific events from a data set. For full details, please visit
239 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents''')
240  parser.add_argument('--output', dest='base', type=str,
241  default='pickevents',
242  help='Base name to use for output files (root, JSON, run and event list, etc.)")')
243  parser.add_argument('--runInteractive', dest='runInteractive', action='store_true',
244  help = 'Call "cmsRun" command if possible. Can take a long time.')
245  parser.add_argument('--printInteractive', dest='printInteractive', action='store_true',
246  help = 'Print "cmsRun" command instead of running it.')
247  parser.add_argument('--maxEventsInteractive', dest='maxEventsInteractive', type=int,
248  default=20,
249  help = 'Maximum number of events allowed to be processed interactively.')
250  parser.add_argument('--crab', dest='crab', action='store_true',
251  help = 'Force CRAB setup instead of interactive mode')
252  parser.add_argument('--crabCondor', dest='crabCondor', action='store_true',
253  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
254  parser.add_argument('--email', dest='email', type=str,
255  default=email,
256  help="Specify email for CRAB")
257  das_cli = ''
258  parser.add_argument('--das-client', dest='das_cli', type=str,
259  default=das_cli,
260  help="Specify das client to use")
261  parser.add_argument("dataset", type=str)
262  parser.add_argument("events", metavar="events_or_events.txt", type=str, nargs='+')
263  options = parser.parse_args()
264 
265  Event.dataset = options.dataset
266  commentRE = re.compile (r'#.+$')
267  colonRE = re.compile (r':')
268  eventList = []
269  if len (options.events) > 1 or colonRE.search (options.events[0]):
270  # events are coming in from the command line
271  for piece in options.events:
272  try:
273  event = Event (piece)
274  except:
275  raise RuntimeError("'%s' is not a proper event" % piece)
276  eventList.append (event)
277  else:
278  # read events from file
279  source = open(options.events[0], 'r')
280  for line in source:
281  line = commentRE.sub ('', line)
282  try:
283  event = Event (line)
284  except:
285  print("Skipping '%s'." % line.strip())
286  continue
287  eventList.append(event)
288  source.close()
289 
290  if not eventList:
291  print("No events defined. Aborting.")
292  sys.exit()
293 
294  if len (eventList) > options.maxEventsInteractive:
295  options.crab = True
296 
297  if options.crab:
298 
299 
302  if options.runInteractive:
303  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
304  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
305  json = LumiList (lumis = runsAndLumis)
306  eventsToProcess = '\n'.join(\
307  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
308  crabDict = setupCrabDict (options)
309  json.writeJSON (crabDict['json'])
310  target = open (crabDict['runEvent'], 'w')
311  target.write ("%s\n" % eventsToProcess)
312  target.close()
313  target = open (crabDict['crabcfg'], 'w')
314  target.write (crabTemplate % crabDict)
315  target.close
316  print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
317  if options.crabCondor:
318  print("You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
319  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
320  print("** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
321  print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
322 
323  else:
324 
325 
328  files = []
329  eventPurgeList = []
330  for event in eventList:
331  eventFiles = getFileNames(event, options.das_cli)
332  if eventFiles == ['[]']: # event not contained in the input dataset
333  print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
334  eventPurgeList.append( event )
335  else:
336  files.extend( eventFiles )
337  # Purge events
338  for event in eventPurgeList:
339  eventList.remove( event )
340  # Purge duplicate files
341  fileSet = set()
342  uniqueFiles = []
343  for filename in files:
344  if filename in fileSet:
345  continue
346  fileSet.add (filename)
347  uniqueFiles.append (filename)
348  source = ','.join (uniqueFiles) + '\n'
349  eventsToProcess = ','.join(\
350  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
351  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
352  % (options.base, eventsToProcess, source)
353  print("\n%s" % command)
354  if options.runInteractive and not options.printInteractive:
355  os.system (command)
356 
def get_value(data, filters, base=10)
Definition: das_client.py:248
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
Definition: das_client.py:276
def getFileNames_dasgoclient(event)
def getFileNames(event, client=None)
Subroutines ##.
Event helper class ##.
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __getattr__(self, key)
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def getFileNames_das_client(event)
def __init__(self, line, kwargs)
def setupCrabDict(options)