CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 import os
10 import sys
11 import optparse
12 import re
13 import commands
14 from FWCore.PythonUtilities.LumiList import LumiList
15 import das_client
16 import json
17 from pprint import pprint
18 from datetime import datetime
19 
20 
21 help = """
22 How to use:
23 
24 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
25 
26 - or -
27 
28 edmPickEvent.py dataset listOfEvents.txt
29 
30 listOfEvents is a text file:
31 # this line is ignored as a comment
32 # since '#' is a valid comment character
33 run1 lumi_section1 event1
34 run2 lumi_section2 event2
35 
36 For example:
37 # run lum event
38 46968 2 4
39 47011 105 23
40 47011 140 12312
41 
42 run, lumi_section, and event are integers that you can get from
43 edm::Event(Auxiliary)
44 
45 dataset: it just a name of the physics dataset, if you don't know exact name
46  you can provide a mask, e.g.: *QCD*RAW
47 
48 For updated information see Wiki:
49 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
50 """
51 
52 
53 ########################
54 ## Event helper class ##
55 ########################
56 
57 class Event (dict):
58 
59  dataset = None
60  splitRE = re.compile (r'[\s:,]+')
61  def __init__ (self, line, **kwargs):
62  pieces = Event.splitRE.split (line.strip())
63  try:
64  self['run'] = int( pieces[0] )
65  self['lumi'] = int( pieces[1] )
66  self['event'] = int( pieces[2] )
67  self['dataset'] = Event.dataset
68  except:
69  raise RuntimeError("Can not parse '%s' as Event object" \
70  % line.strip())
71  if not self['dataset']:
72  print "No dataset is defined for '%s'. Aborting." % line.strip()
73  raise RuntimeError('Missing dataset')
74 
75  def __getattr__ (self, key):
76  return self[key]
77 
78  def __str__ (self):
79  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
80 
81 
82 #################
83 ## Subroutines ##
84 #################
85 
86 def getFileNames (event):
87  files = []
88  # Query DAS
89  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
90  jsondict = das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False)
91  status = jsondict['status']
92  if status != 'ok':
93  print "DAS query status: %s"%(status)
94  return files
95 
96  mongo_query = jsondict['mongo_query']
97  filters = mongo_query['filters']
98  data = jsondict['data']
99 
100  files = []
101  for row in data:
102  file = [r for r in das_client.get_value(row, filters['grep'])][0]
103  if len(file) > 0 and not file in files:
104  files.append(file)
105 
106  return files
107 
108 
110  base = os.environ.get ('CMSSW_BASE')
111  if not base:
112  raise RuntimeError("CMSSW Environment not set")
113  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
114  % base
115  if os.path.exists (retval):
116  return retval
117  base = os.environ.get ('CMSSW_RELEASE_BASE')
118  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
119  % base
120  if os.path.exists (retval):
121  return retval
122  raise RuntimeError("Could not find copyPickMerge_cfg.py")
123 
125  return '%s@%s' % (commands.getoutput ('whoami'),
126  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
127 
128 def setupCrabDict (options):
129  date = datetime.now().strftime('%Y%m%d_%H%M%S')
130  crab = {}
131  base = options.base
132  crab['runEvent'] = '%s_runEvents.txt' % base
133  crab['copyPickMerge'] = fullCPMpath()
134  crab['output'] = '%s.root' % base
135  crab['crabcfg'] = '%s_crab.py' % base
136  crab['json'] = '%s.json' % base
137  crab['dataset'] = Event.dataset
138  crab['email'] = options.email
139  crab['WorkArea'] = date
140  if options.crabCondor:
141  crab['scheduler'] = 'condor'
142 # crab['useServer'] = ''
143  else:
144  crab['scheduler'] = 'remoteGlidein'
145 # crab['useServer'] = 'use_server = 1'
146  crab['useServer'] = ''
147  return crab
148 
149 # crab template
150 crabTemplate = '''
151 ## Edited By Raman Khurana
152 ##
153 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
154 ##
155 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
156 ##
157 ## Once you are happy with this file, please run
158 ## crab submit
159 
160 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
161 
162 from WMCore.Configuration import Configuration
163 config = Configuration()
164 
165 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
166 config.section_("General")
167 config.General.requestName = 'pickEvents'
168 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
169 
170 
171 config.section_("JobType")
172 config.JobType.pluginName = 'Analysis'
173 config.JobType.psetName = '%(copyPickMerge)s'
174 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
175 
176 config.section_("Data")
177 config.Data.inputDataset = '%(dataset)s'
178 
179 config.Data.inputDBS = 'global'
180 config.Data.splitting = 'LumiBased'
181 config.Data.unitsPerJob = 5
182 config.Data.lumiMask = '%(json)s'
183 #config.Data.publication = True
184 #config.Data.publishDbsUrl = 'phys03'
185 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
186 #config.JobType.allowNonProductionCMSSW=True
187 
188 config.section_("Site")
189 ## Change site name accordingly
190 config.Site.storageSite = "T2_US_Wisconsin"
191 
192 '''
193 
194 ########################
195 ## ################## ##
196 ## ## Main Program ## ##
197 ## ################## ##
198 ########################
199 
200 if __name__ == "__main__":
201  email = guessEmail()
202  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
203 facilitates picking specific events from a data set. For full details, please visit
204 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
205  parser.add_option ('--output', dest='base', type='string',
206  default='pickevents',
207  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
208  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
209  help = 'Call "cmsRun" command if possible. Can take a long time.')
210  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
211  help = 'Print "cmsRun" command instead of running it.')
212  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
213  default=20,
214  help = 'Maximum number of events allowed to be processed interactively.')
215  parser.add_option ('--crab', dest='crab', action='store_true',
216  help = 'Force CRAB setup instead of interactive mode')
217  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
218  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
219  parser.add_option ('--email', dest='email', type='string',
220  default='',
221  help="Specify email for CRAB (default '%s')" % email )
222  (options, args) = parser.parse_args()
223 
224 
225  if len(args) < 2:
226  parser.print_help()
227  sys.exit(0)
228 
229  if not options.email:
230  options.email = email
231 
232  Event.dataset = args.pop(0)
233  commentRE = re.compile (r'#.+$')
234  colonRE = re.compile (r':')
235  eventList = []
236  if len (args) > 1 or colonRE.search (args[0]):
237  # events are coming in from the command line
238  for piece in args:
239  try:
240  event = Event (piece)
241  except:
242  raise RuntimeError("'%s' is not a proper event" % piece)
243  eventList.append (event)
244  else:
245  # read events from file
246  source = open(args[0], 'r')
247  for line in source:
248  line = commentRE.sub ('', line)
249  try:
250  event = Event (line)
251  except:
252  print "Skipping '%s'." % line.strip()
253  continue
254  eventList.append(event)
255  source.close()
256 
257  if not eventList:
258  print "No events defined. Aborting."
259  sys.exit()
260 
261  if len (eventList) > options.maxEventsInteractive:
262  options.crab = True
263 
264  if options.crab:
265 
266  ##########
267  ## CRAB ##
268  ##########
269  if options.runInteractive:
270  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
271  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
272  json = LumiList (lumis = runsAndLumis)
273  eventsToProcess = '\n'.join(\
274  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
275  crabDict = setupCrabDict (options)
276  json.writeJSON (crabDict['json'])
277  target = open (crabDict['runEvent'], 'w')
278  target.write ("%s\n" % eventsToProcess)
279  target.close()
280  target = open (crabDict['crabcfg'], 'w')
281  target.write (crabTemplate % crabDict)
282  target.close
283  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
284  if options.crabCondor:
285  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
286  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
287  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
288  print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
289 
290  else:
291 
292  #################
293  ## Interactive ##
294  #################
295  files = []
296  eventPurgeList = []
297  for event in eventList:
298  eventFiles = getFileNames (event)
299  if eventFiles == ['[]']: # event not contained in the input dataset
300  print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
301  eventPurgeList.append( event )
302  else:
303  files.extend( eventFiles )
304  # Purge events
305  for event in eventPurgeList:
306  eventList.remove( event )
307  # Purge duplicate files
308  fileSet = set()
309  uniqueFiles = []
310  for filename in files:
311  if filename in fileSet:
312  continue
313  fileSet.add (filename)
314  uniqueFiles.append (filename)
315  source = ','.join (uniqueFiles) + '\n'
316  eventsToProcess = ','.join(\
317  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
318  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
319  % (options.base, eventsToProcess, source)
320  print "\n%s" % command
321  if options.runInteractive and not options.printInteractive:
322  os.system (command)
323 
Event helper class ##.
def getFileNames
Subroutines ##.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139