CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
edmPickEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Anzar Afaq June 17, 2008
4 # Oleksiy Atramentov June 21, 2008
5 # Charles Plager Sept 7, 2010
6 # Volker Adler Apr 16, 2014
7 
8 import os
9 import sys
10 import optparse
11 import re
12 import commands
13 from FWCore.PythonUtilities.LumiList import LumiList
14 import das_client
15 import json
16 from pprint import pprint
17 
18 
19 help = """
20 How to use:
21 
22 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
23 
24 - or -
25 
26 edmPickEvent.py dataset listOfEvents.txt
27 
28 
29 listOfEvents is a text file:
30 # this line is ignored as a comment
31 # since '#' is a valid comment character
32 run1 lumi_section1 event1
33 run2 lumi_section2 event2
34 
35 For example:
36 # run lum event
37 46968 2 4
38 47011 105 23
39 47011 140 12312
40 
41 run, lumi_section, and event are integers that you can get from
42 edm::Event(Auxiliary)
43 
44 dataset: it just a name of the physics dataset, if you don't know exact name
45  you can provide a mask, e.g.: *QCD*RAW
46 
47 For updated information see Wiki:
48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
49 """
50 
51 
52 ########################
53 ## Event helper class ##
54 ########################
55 
56 class Event (dict):
57 
58  dataset = None
59  splitRE = re.compile (r'[\s:,]+')
60  def __init__ (self, line, **kwargs):
61  pieces = Event.splitRE.split (line.strip())
62  try:
63  self['run'] = int( pieces[0] )
64  self['lumi'] = int( pieces[1] )
65  self['event'] = int( pieces[2] )
66  self['dataset'] = Event.dataset
67  except:
68  raise RuntimeError, "Can not parse '%s' as Event object" \
69  % line.strip()
70  if not self['dataset']:
71  print "No dataset is defined for '%s'. Aborting." % line.strip()
72  raise RuntimeError, 'Missing dataset'
73 
74  def __getattr__ (self, key):
75  return self[key]
76 
77  def __str__ (self):
78  return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
79 
80 
81 #################
82 ## Subroutines ##
83 #################
84 
85 def getFileNames (event):
86  files = []
87  # Query DAS
88  query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
89  jsondict = das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False)
90  status = jsondict['status']
91  if status != 'ok':
92  print "DAS query status: %s"%(status)
93  return files
94 
95  mongo_query = jsondict['mongo_query']
96  filters = mongo_query['filters']
97  data = jsondict['data']
98 
99  files = []
100  for row in data:
101  file = [r for r in das_client.get_value(row, filters['grep'])][0]
102  if len(file) > 0 and not file in files:
103  files.append(file)
104 
105  return files
106 
107 
109  base = os.environ.get ('CMSSW_BASE')
110  if not base:
111  raise RuntimeError, "CMSSW Environment not set"
112  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
113  % base
114  if os.path.exists (retval):
115  return retval
116  base = os.environ.get ('CMSSW_RELEASE_BASE')
117  retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
118  % base
119  if os.path.exists (retval):
120  return retval
121  raise RuntimeError, "Could not find copyPickMerge_cfg.py"
122 
124  return '%s@%s' % (commands.getoutput ('whoami'),
125  '.'.join(commands.getoutput('hostname').split('.')[-2:]))
126 
127 
128 def setupCrabDict (options):
129  crab = {}
130  base = options.base
131  crab['runEvent'] = '%s_runEvents.txt' % base
132  crab['copyPickMerge'] = fullCPMpath()
133  crab['output'] = '%s.root' % base
134  crab['crabcfg'] = '%s_crab.config' % base
135  crab['json'] = '%s.json' % base
136  crab['dataset'] = Event.dataset
137  crab['email'] = options.email
138  if options.crabCondor:
139  crab['scheduler'] = 'condor'
140 # crab['useServer'] = ''
141  else:
142  crab['scheduler'] = 'remoteGlidein'
143 # crab['useServer'] = 'use_server = 1'
144  crab['useServer'] = ''
145  return crab
146 
147 
148 # crab template
149 crabTemplate = '''
150 # CRAB documentation:
151 # https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrab
152 #
153 # Once you are happy with this file, please run
154 # crab -create -cfg %(crabcfg)s
155 # crab -submit -cfg %(crabcfg)s
156 
157 [CMSSW]
158 pycfg_params = eventsToProcess_load=%(runEvent)s outputFile=%(output)s
159 
160 lumi_mask = %(json)s
161 total_number_of_lumis = -1
162 lumis_per_job = 1
163 pset = %(copyPickMerge)s
164 datasetpath = %(dataset)s
165 output_file = %(output)s
166 
167 [USER]
168 return_data = 1
169 email = %(email)s
170 
171 # if you want to copy the data or put it in a storage element, do it
172 # here.
173 
174 
175 [CRAB]
176 # use "glite" in general; you can "condor" if you run on CAF at FNAL or USG
177 # site AND you know the files are available locally
178 scheduler = %(scheduler)s
179 jobtype = cmssw
180 %(useServer)s
181 '''
182 
183 
184 ########################
185 ## ################## ##
186 ## ## Main Program ## ##
187 ## ################## ##
188 ########################
189 
190 if __name__ == "__main__":
191  email = guessEmail()
192  parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
193 facilitates picking specific events from a data set. For full details, please visit
194 https://twiki.cern.ch/twiki/bin/view/CMS/PickEvents ''')
195  parser.add_option ('--output', dest='base', type='string',
196  default='pickevents',
197  help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
198  parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
199  help = 'Call "cmsRun" command if possible. Can take a long time.')
200  parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
201  help = 'Print "cmsRun" command instead of running it.')
202  parser.add_option ('--crab', dest='crab', action='store_true',
203  help = 'Force CRAB setup instead of interactive mode')
204  parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
205  help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
206  parser.add_option ('--email', dest='email', type='string',
207  default='',
208  help="Specify email for CRAB (default '%s')" % email )
209  (options, args) = parser.parse_args()
210 
211 
212  if len(args) < 2:
213  parser.print_help()
214  sys.exit(0)
215 
216  if not options.email:
217  options.email = email
218 
219  Event.dataset = args.pop(0)
220  commentRE = re.compile (r'#.+$')
221  colonRE = re.compile (r':')
222  eventList = []
223  if len (args) > 1 or colonRE.search (args[0]):
224  # events are coming in from the command line
225  for piece in args:
226  try:
227  event = Event (piece)
228  except:
229  raise RuntimeError, "'%s' is not a proper event" % piece
230  eventList.append (event)
231  else:
232  # read events from file
233  source = open(args[0], 'r')
234  for line in source:
235  line = commentRE.sub ('', line)
236  try:
237  event = Event (line)
238  except:
239  print "Skipping '%s'." % line.strip()
240  continue
241  eventList.append(event)
242  source.close()
243 
244  if len (eventList) > 20:
245  options.crab = True
246 
247  if options.crab:
248 
249  ##########
250  ## CRAB ##
251  ##########
252  if options.runInteractive:
253  raise RuntimeError, "This job is can not be run interactive, but rather by crab. Please call without '--runInteractive' flag."
254  runsAndLumis = [ (event.run, event.lumi) for event in eventList]
255  json = LumiList (lumis = runsAndLumis)
256  eventsToProcess = '\n'.join(\
257  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
258  crabDict = setupCrabDict (options)
259  json.writeJSON (crabDict['json'])
260  target = open (crabDict['runEvent'], 'w')
261  target.write ("%s\n" % eventsToProcess)
262  target.close()
263  target = open (crabDict['crabcfg'], 'w')
264  target.write (crabTemplate % crabDict)
265  target.close
266  print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
267  if options.crabCondor:
268  print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
269  if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
270  print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
271  print "Setup your environment for CRAB. Then edit %(crabcfg)s to make any desired changed. The run:\n\ncrab -create -cfg %(crabcfg)s\ncrab -submit\n" % crabDict
272 
273  else:
274 
275  #################
276  ## Interactive ##
277  #################
278  files = []
279  for event in eventList:
280  files.extend( getFileNames (event) )
281  if not eventList:
282  print "No events defind. Aborting."
283  sys.exit()
284  # Purge duplicate files
285  fileSet = set()
286  uniqueFiles = []
287  for filename in files:
288  if filename in fileSet:
289  continue
290  fileSet.add (filename)
291  uniqueFiles.append (filename)
292  source = ','.join (uniqueFiles) + '\n'
293  eventsToProcess = ','.join(\
294  sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
295  command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
296  % (options.base, eventsToProcess, source)
297  print "\n%s" % command
298  if options.runInteractive and not options.printInteractive:
299  os.system (command)
300 
Event helper class ##.
def getFileNames
Subroutines ##.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139
void set(const std::string &name, int value)
set the flag, with a run-time name