CMS 3D CMS Logo

edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 import os
10 import sys
11 import optparse
12 import re
13 import commands
14 from FWCore.PythonUtilities.LumiList import LumiList
15 import json
16 from pprint import pprint
17 from datetime import datetime
18 import Utilities.General.cmssw_das_client as das_client
19 help = """
20 How to use:
21 
22 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
23 
24 - or -
25 
26 edmPickEvent.py dataset listOfEvents.txt
27 
28 listOfEvents is a text file:
29 # this line is ignored as a comment
30 # since '#' is a valid comment character
31 run1 lumi_section1 event1
32 run2 lumi_section2 event2
33 
34 For example:
35 # run lum event
36 46968 2 4
37 47011 105 23
38 47011 140 12312
39 
40 run, lumi_section, and event are integers that you can get from
41 edm::Event(Auxiliary)
42 
43 dataset: it just a name of the physics dataset, if you don't know exact name
44  you can provide a mask, e.g.: *QCD*RAW
45 
46 For updated information see Wiki:
47 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
48 """
49 
50 
51 ########################
52 ## Event helper class ##
53 ########################
54 
55 class Event (dict):
56 
57  dataset = None
58  splitRE = re.compile (r'[\s:,]+')
59  def __init__ (self, line, **kwargs):
60  pieces = Event.splitRE.split (line.strip())
61  try:
62  self['run'] = int( pieces[0] )
63  self['lumi'] = int( pieces[1] )
64  self['event'] = int( pieces[2] )
65  self['dataset'] = Event.dataset
66  except:
67  raise RuntimeError("Can not parse '%s' as Event object" \
68  % line.strip())
69  if not self['dataset']:
70  print "No dataset is defined for '%s'. Aborting." % line.strip()
71  raise RuntimeError('Missing dataset')
72 
73  def __getattr__ (self, key):
74  return self[key]
75 
76  def __str__ (self):
77  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
78 
79 
80 #################
81 ## Subroutines ##
82 #################
83 
84 def getFileNames (event):
85  files = []
86 
87  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
88  jsondict = das_client.get_data(query)
89  status = jsondict['status']
90  if status != 'ok':
91  print "DAS query status: %s"%(status)
92  return files
93 
94  mongo_query = jsondict['mongo_query']
95  filters = mongo_query['filters']
96  data = jsondict['data']
97 
98  files = []
99  for row in data:
100  file = [r for r in das_client.get_value(row, filters['grep'])][0]
101  if len(file) > 0 and not file in files:
102  files.append(file)
103 
104  return files
105 
106 
108  base = os.environ.get ('CMSSW_BASE')
109  if not base:
110  raise RuntimeError("CMSSW Environment not set")
111  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
112  % base
113  if os.path.exists (retval):
114  return retval
115  base = os.environ.get ('CMSSW_RELEASE_BASE')
116  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
117  % base
118  if os.path.exists (retval):
119  return retval
120  raise RuntimeError("Could not find copyPickMerge_cfg.py")
121 
123  return '%s@%s' % (commands.getoutput ('whoami'),
124  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
125 
126 def setupCrabDict (options):
127  date = datetime.now().strftime('%Y%m%d_%H%M%S')
128  crab = {}
129  base = options.base
130  crab['runEvent'] = '%s_runEvents.txt' % base
131  crab['copyPickMerge'] = fullCPMpath()
132  crab['output'] = '%s.root' % base
133  crab['crabcfg'] = '%s_crab.py' % base
134  crab['json'] = '%s.json' % base
135  crab['dataset'] = Event.dataset
136  crab['email'] = options.email
137  crab['WorkArea'] = date
138  if options.crabCondor:
139  crab['scheduler'] = 'condor'
140 # crab['useServer'] = ''
141  else:
142  crab['scheduler'] = 'remoteGlidein'
143 # crab['useServer'] = 'use_server = 1'
144  crab['useServer'] = ''
145  return crab
146 
147 # crab template
148 crabTemplate = '''
149 ## Edited By Raman Khurana
150 ##
151 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
152 ##
153 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
154 ##
155 ## Once you are happy with this file, please run
156 ## crab submit
157 
158 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
159 
160 from WMCore.Configuration import Configuration
161 config = Configuration()
162 
163 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
164 config.section_("General")
165 config.General.requestName = 'pickEvents'
166 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
167 
168 
169 config.section_("JobType")
170 config.JobType.pluginName = 'Analysis'
171 config.JobType.psetName = '%(copyPickMerge)s'
172 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
173 
174 config.section_("Data")
175 config.Data.inputDataset = '%(dataset)s'
176 
177 config.Data.inputDBS = 'global'
178 config.Data.splitting = 'LumiBased'
179 config.Data.unitsPerJob = 5
180 config.Data.lumiMask = '%(json)s'
181 #config.Data.publication = True
182 #config.Data.publishDbsUrl = 'phys03'
183 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
184 #config.JobType.allowNonProductionCMSSW=True
185 
186 config.section_("Site")
187 ## Change site name accordingly
188 config.Site.storageSite = "T2_US_Wisconsin"
189 
190 '''
191 
192 ########################
193 ## ################## ##
194 ## ## Main Program ## ##
195 ## ################## ##
196 ########################
197 
198 if __name__ == "__main__":
199  email = guessEmail()
200  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
201 facilitates picking specific events from a data set. For full details, please visit
202 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
203  parser.add_option ('--output', dest='base', type='string',
204  default='pickevents',
205  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
206  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
207  help = 'Call "cmsRun" command if possible. Can take a long time.')
208  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
209  help = 'Print "cmsRun" command instead of running it.')
210  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
211  default=20,
212  help = 'Maximum number of events allowed to be processed interactively.')
213  parser.add_option ('--crab', dest='crab', action='store_true',
214  help = 'Force CRAB setup instead of interactive mode')
215  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
216  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
217  parser.add_option ('--email', dest='email', type='string',
218  default='',
219  help="Specify email for CRAB (default '%s')" % email )
220  (options, args) = parser.parse_args()
221 
222 
223  if len(args) < 2:
224  parser.print_help()
225  sys.exit(0)
226 
227  if not options.email:
228  options.email = email
229 
230  Event.dataset = args.pop(0)
231  commentRE = re.compile (r'#.+$')
232  colonRE = re.compile (r':')
233  eventList = []
234  if len (args) > 1 or colonRE.search (args[0]):
235  # events are coming in from the command line
236  for piece in args:
237  try:
238  event = Event (piece)
239  except:
240  raise RuntimeError("'%s' is not a proper event" % piece)
241  eventList.append (event)
242  else:
243  # read events from file
244  source = open(args[0], 'r')
245  for line in source:
246  line = commentRE.sub ('', line)
247  try:
248  event = Event (line)
249  except:
250  print "Skipping '%s'." % line.strip()
251  continue
252  eventList.append(event)
253  source.close()
254 
255  if not eventList:
256  print "No events defined. Aborting."
257  sys.exit()
258 
259  if len (eventList) > options.maxEventsInteractive:
260  options.crab = True
261 
262  if options.crab:
263 
264  ##########
265  ## CRAB ##
266  ##########
267  if options.runInteractive:
268  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
269  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
270  json = LumiList (lumis = runsAndLumis)
271  eventsToProcess = '\n'.join(\
272  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
273  crabDict = setupCrabDict (options)
274  json.writeJSON (crabDict['json'])
275  target = open (crabDict['runEvent'], 'w')
276  target.write ("%s\n" % eventsToProcess)
277  target.close()
278  target = open (crabDict['crabcfg'], 'w')
279  target.write (crabTemplate % crabDict)
280  target.close
281  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
282  if options.crabCondor:
283  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
284  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
285  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
286  print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
287 
288  else:
289 
290  #################
291  ## Interactive ##
292  #################
293  files = []
294  eventPurgeList = []
295  for event in eventList:
296  eventFiles = getFileNames (event)
297  if eventFiles == ['[]']: # event not contained in the input dataset
298  print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
299  eventPurgeList.append( event )
300  else:
301  files.extend( eventFiles )
302  # Purge events
303  for event in eventPurgeList:
304  eventList.remove( event )
305  # Purge duplicate files
306  fileSet = set()
307  uniqueFiles = []
308  for filename in files:
309  if filename in fileSet:
310  continue
311  fileSet.add (filename)
312  uniqueFiles.append (filename)
313  source = ','.join (uniqueFiles) + '\n'
314  eventsToProcess = ','.join(\
315  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
316  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
317  % (options.base, eventsToProcess, source)
318  print "\n%s" % command
319  if options.runInteractive and not options.printInteractive:
320  os.system (command)
321 
Event helper class ##.
def getFileNames(event)
Subroutines ##.
def __getattr__(self, key)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __init__(self, line, kwargs)
def setupCrabDict(options)
double split
Definition: MVATrainer.cc:139