test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 # Raman Khurana June 18, 2015
8 # Dinko Ferencek June 27, 2015
9 import os
10 import sys
11 import optparse
12 import re
13 import commands
14 from FWCore.PythonUtilities.LumiList import LumiList
15 import json
16 from pprint import pprint
17 from datetime import datetime
18 from subprocess import Popen,PIPE
19 from types import GeneratorType
20 help = """
21 How to use:
22 
23 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
24 
25 - or -
26 
27 edmPickEvent.py dataset listOfEvents.txt
28 
29 listOfEvents is a text file:
30 # this line is ignored as a comment
31 # since '#' is a valid comment character
32 run1 lumi_section1 event1
33 run2 lumi_section2 event2
34 
35 For example:
36 # run lum event
37 46968 2 4
38 47011 105 23
39 47011 140 12312
40 
41 run, lumi_section, and event are integers that you can get from
42 edm::Event(Auxiliary)
43 
44 dataset: it just a name of the physics dataset, if you don't know exact name
45  you can provide a mask, e.g.: *QCD*RAW
46 
47 For updated information see Wiki:
48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
49 """
50 
51 #helper function taken from das_client
52 def convert_time(val):
53  "Convert given timestamp into human readable format"
54  if isinstance(val, int) or isinstance(val, float):
55  return time.strftime('%d/%b/%Y_%H:%M:%S_GMT', time.gmtime(val))
56  return val
57 
58 def size_format(uinput, ibase=0):
59  """
60  Format file size utility, it converts file size into KB, MB, GB, TB, PB units
61  """
62  if not ibase:
63  return uinput
64  try:
65  num = float(uinput)
66  except Exception as _exc:
67  return uinput
68  if ibase == 2.: # power of 2
69  base = 1024.
70  xlist = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
71  else: # default base is 10
72  base = 1000.
73  xlist = ['', 'KB', 'MB', 'GB', 'TB', 'PB']
74  for xxx in xlist:
75  if num < base:
76  return "%3.1f%s" % (num, xxx)
77  num /= base
78 
79 def extract_value(row, key, base=10):
80  """Generator which extracts row[key] value"""
81  if isinstance(row, dict) and key in row:
82  if key == 'creation_time':
83  row = convert_time(row[key])
84  elif key == 'size':
85  row = size_format(row[key], base)
86  else:
87  row = row[key]
88  yield row
89  if isinstance(row, list) or isinstance(row, GeneratorType):
90  for item in row:
91  for vvv in extract_value(item, key, base):
92  yield vvv
93 
94 
95 def get_value(data, filters, base=10):
96  """Filter data from a row for given list of filters"""
97  for ftr in filters:
98  if ftr.find('>') != -1 or ftr.find('<') != -1 or ftr.find('=') != -1:
99  continue
100  row = dict(data)
101  values = []
102  keys = ftr.split('.')
103  for key in keys:
104  val = [v for v in extract_value(row, key, base)]
105  if key == keys[-1]: # we collect all values at last key
106  values += [json.dumps(i) for i in val]
107  else:
108  row = val
109  if len(values) == 1:
110  yield values[0]
111  else:
112  yield values
113 
114 
115 
116 ########################
117 ## Event helper class ##
118 ########################
119 
120 class Event (dict):
121 
122  dataset = None
123  splitRE = re.compile (r'[\s:,]+')
124  def __init__ (self, line, **kwargs):
125  pieces = Event.splitRE.split (line.strip())
126  try:
127  self['run'] = int( pieces[0] )
128  self['lumi'] = int( pieces[1] )
129  self['event'] = int( pieces[2] )
130  self['dataset'] = Event.dataset
131  except:
132  raise RuntimeError("Can not parse '%s' as Event object" \
133  % line.strip())
134  if not self['dataset']:
135  print "No dataset is defined for '%s'. Aborting." % line.strip()
136  raise RuntimeError('Missing dataset')
137 
138  def __getattr__ (self, key):
139  return self[key]
140 
141  def __str__ (self):
142  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
143 
144 
145 #################
146 ## Subroutines ##
147 #################
148 
149 def getFileNames (event):
150  files = []
151 
152  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
153  p = Popen('das_client --format json --query "%s"'%(query), stdout=PIPE,shell=True)
154  pipe=p.stdout.read()
155  tupleP = os.waitpid(p.pid, 0)
156 
157  jsondict = json.loads(pipe)#das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False)
158  status = jsondict['status']
159  if status != 'ok':
160  print "DAS query status: %s"%(status)
161  return files
162 
163  mongo_query = jsondict['mongo_query']
164  filters = mongo_query['filters']
165  data = jsondict['data']
166 
167  files = []
168  for row in data:
169  file = [r for r in get_value(row, filters['grep'])][0]
170  if len(file) > 0 and not file in files:
171  files.append(file)
172 
173  return files
174 
175 
177  base = os.environ.get ('CMSSW_BASE')
178  if not base:
179  raise RuntimeError("CMSSW Environment not set")
180  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
181  % base
182  if os.path.exists (retval):
183  return retval
184  base = os.environ.get ('CMSSW_RELEASE_BASE')
185  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
186  % base
187  if os.path.exists (retval):
188  return retval
189  raise RuntimeError("Could not find copyPickMerge_cfg.py")
190 
192  return '%s@%s' % (commands.getoutput ('whoami'),
193  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
194 
195 def setupCrabDict (options):
196  date = datetime.now().strftime('%Y%m%d_%H%M%S')
197  crab = {}
198  base = options.base
199  crab['runEvent'] = '%s_runEvents.txt' % base
200  crab['copyPickMerge'] = fullCPMpath()
201  crab['output'] = '%s.root' % base
202  crab['crabcfg'] = '%s_crab.py' % base
203  crab['json'] = '%s.json' % base
204  crab['dataset'] = Event.dataset
205  crab['email'] = options.email
206  crab['WorkArea'] = date
207  if options.crabCondor:
208  crab['scheduler'] = 'condor'
209 # crab['useServer'] = ''
210  else:
211  crab['scheduler'] = 'remoteGlidein'
212 # crab['useServer'] = 'use_server = 1'
213  crab['useServer'] = ''
214  return crab
215 
216 # crab template
217 crabTemplate = '''
218 ## Edited By Raman Khurana
219 ##
220 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
221 ##
222 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
223 ##
224 ## Once you are happy with this file, please run
225 ## crab submit
226 
227 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
228 
229 from WMCore.Configuration import Configuration
230 config = Configuration()
231 
232 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
233 config.section_("General")
234 config.General.requestName = 'pickEvents'
235 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
236 
237 
238 config.section_("JobType")
239 config.JobType.pluginName = 'Analysis'
240 config.JobType.psetName = '%(copyPickMerge)s'
241 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
242 
243 config.section_("Data")
244 config.Data.inputDataset = '%(dataset)s'
245 
246 config.Data.inputDBS = 'global'
247 config.Data.splitting = 'LumiBased'
248 config.Data.unitsPerJob = 5
249 config.Data.lumiMask = '%(json)s'
250 #config.Data.publication = True
251 #config.Data.publishDbsUrl = 'phys03'
252 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
253 #config.JobType.allowNonProductionCMSSW=True
254 
255 config.section_("Site")
256 ## Change site name accordingly
257 config.Site.storageSite = "T2_US_Wisconsin"
258 
259 '''
260 
261 ########################
262 ## ################## ##
263 ## ## Main Program ## ##
264 ## ################## ##
265 ########################
266 
267 if __name__ == "__main__":
268  email = guessEmail()
269  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
270 facilitates picking specific events from a data set. For full details, please visit
271 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
272  parser.add_option ('--output', dest='base', type='string',
273  default='pickevents',
274  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
275  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
276  help = 'Call "cmsRun" command if possible. Can take a long time.')
277  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
278  help = 'Print "cmsRun" command instead of running it.')
279  parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
280  default=20,
281  help = 'Maximum number of events allowed to be processed interactively.')
282  parser.add_option ('--crab', dest='crab', action='store_true',
283  help = 'Force CRAB setup instead of interactive mode')
284  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
285  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
286  parser.add_option ('--email', dest='email', type='string',
287  default='',
288  help="Specify email for CRAB (default '%s')" % email )
289  (options, args) = parser.parse_args()
290 
291 
292  if len(args) < 2:
293  parser.print_help()
294  sys.exit(0)
295 
296  if not options.email:
297  options.email = email
298 
299  Event.dataset = args.pop(0)
300  commentRE = re.compile (r'#.+$')
301  colonRE = re.compile (r':')
302  eventList = []
303  if len (args) > 1 or colonRE.search (args[0]):
304  # events are coming in from the command line
305  for piece in args:
306  try:
307  event = Event (piece)
308  except:
309  raise RuntimeError("'%s' is not a proper event" % piece)
310  eventList.append (event)
311  else:
312  # read events from file
313  source = open(args[0], 'r')
314  for line in source:
315  line = commentRE.sub ('', line)
316  try:
317  event = Event (line)
318  except:
319  print "Skipping '%s'." % line.strip()
320  continue
321  eventList.append(event)
322  source.close()
323 
324  if not eventList:
325  print "No events defined. Aborting."
326  sys.exit()
327 
328  if len (eventList) > options.maxEventsInteractive:
329  options.crab = True
330 
331  if options.crab:
332 
333  ##########
334  ## CRAB ##
335  ##########
336  if options.runInteractive:
337  raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
338  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
339  json = LumiList (lumis = runsAndLumis)
340  eventsToProcess = '\n'.join(\
341  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
342  crabDict = setupCrabDict (options)
343  json.writeJSON (crabDict['json'])
344  target = open (crabDict['runEvent'], 'w')
345  target.write ("%s\n" % eventsToProcess)
346  target.close()
347  target = open (crabDict['crabcfg'], 'w')
348  target.write (crabTemplate % crabDict)
349  target.close
350  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
351  if options.crabCondor:
352  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
353  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
354  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
355  print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
356 
357  else:
358 
359  #################
360  ## Interactive ##
361  #################
362  files = []
363  eventPurgeList = []
364  for event in eventList:
365  eventFiles = getFileNames (event)
366  if eventFiles == ['[]']: # event not contained in the input dataset
367  print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
368  eventPurgeList.append( event )
369  else:
370  files.extend( eventFiles )
371  # Purge events
372  for event in eventPurgeList:
373  eventList.remove( event )
374  # Purge duplicate files
375  fileSet = set()
376  uniqueFiles = []
377  for filename in files:
378  if filename in fileSet:
379  continue
380  fileSet.add (filename)
381  uniqueFiles.append (filename)
382  source = ','.join (uniqueFiles) + '\n'
383  eventsToProcess = ','.join(\
384  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
385  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
386  % (options.base, eventsToProcess, source)
387  print "\n%s" % command
388  if options.runInteractive and not options.printInteractive:
389  os.system (command)
390 
Event helper class ##.
def getFileNames
Subroutines ##.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139