14 from FWCore.PythonUtilities.LumiList
import LumiList
16 from pprint
import pprint
17 from datetime
import datetime
18 from subprocess
import Popen,PIPE
19 from types
import GeneratorType
23 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
27 edmPickEvent.py dataset listOfEvents.txt
29 listOfEvents is a text file:
30 # this line is ignored as a comment
31 # since '#' is a valid comment character
32 run1 lumi_section1 event1
33 run2 lumi_section2 event2
41 run, lumi_section, and event are integers that you can get from
44 dataset: it just a name of the physics dataset, if you don't know exact name
45 you can provide a mask, e.g.: *QCD*RAW
47 For updated information see Wiki:
48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
53 "Convert given timestamp into human readable format"
54 if isinstance(val, int)
or isinstance(val, float):
55 return time.strftime(
'%d/%b/%Y_%H:%M:%S_GMT', time.gmtime(val))
60 Format file size utility, it converts file size into KB, MB, GB, TB, PB units
66 except Exception
as _exc:
70 xlist = [
'',
'KiB',
'MiB',
'GiB',
'TiB',
'PiB']
73 xlist = [
'',
'KB',
'MB',
'GB',
'TB',
'PB']
76 return "%3.1f%s" % (num, xxx)
80 """Generator which extracts row[key] value"""
81 if isinstance(row, dict)
and key
in row:
82 if key ==
'creation_time':
89 if isinstance(row, list)
or isinstance(row, GeneratorType):
96 """Filter data from a row for given list of filters"""
98 if ftr.find(
'>') != -1
or ftr.find(
'<') != -1
or ftr.find(
'=') != -1:
102 keys = ftr.split(
'.')
106 values += [json.dumps(i)
for i
in val]
123 splitRE = re.compile (
r'[\s:,]+')
125 pieces = Event.splitRE.split (line.strip())
127 self[
'run'] = int( pieces[0] )
128 self[
'lumi'] = int( pieces[1] )
129 self[
'event'] = int( pieces[2] )
130 self[
'dataset'] = Event.dataset
132 raise RuntimeError(
"Can not parse '%s' as Event object" \
134 if not self[
'dataset']:
135 print "No dataset is defined for '%s'. Aborting." % line.strip()
136 raise RuntimeError(
'Missing dataset')
142 return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
152 query =
"file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
153 p = Popen(
'das_client --format json --query "%s"'%(query), stdout=PIPE,shell=
True)
155 tupleP = os.waitpid(p.pid, 0)
157 jsondict = json.loads(pipe)
158 status = jsondict[
'status']
160 print "DAS query status: %s"%(status)
163 mongo_query = jsondict[
'mongo_query']
164 filters = mongo_query[
'filters']
165 data = jsondict[
'data']
169 file = [r
for r
in get_value(row, filters[
'grep'])][0]
170 if len(file) > 0
and not file
in files:
177 base = os.environ.get (
'CMSSW_BASE')
179 raise RuntimeError(
"CMSSW Environment not set")
180 retval =
"%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
182 if os.path.exists (retval):
184 base = os.environ.get (
'CMSSW_RELEASE_BASE')
185 retval =
"%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
187 if os.path.exists (retval):
189 raise RuntimeError(
"Could not find copyPickMerge_cfg.py")
192 return '%s@%s' % (commands.getoutput (
'whoami'),
193 '.'.
join(commands.getoutput(
'hostname').
split(
'.')[-2:]))
196 date = datetime.now().strftime(
'%Y%m%d_%H%M%S')
199 crab[
'runEvent'] =
'%s_runEvents.txt' % base
201 crab[
'output'] =
'%s.root' % base
202 crab[
'crabcfg'] =
'%s_crab.py' % base
203 crab[
'json'] =
'%s.json' % base
204 crab[
'dataset'] = Event.dataset
205 crab[
'email'] = options.email
206 crab[
'WorkArea'] = date
207 if options.crabCondor:
208 crab[
'scheduler'] =
'condor'
211 crab[
'scheduler'] =
'remoteGlidein'
213 crab[
'useServer'] =
''
218 ## Edited By Raman Khurana
220 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
222 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
224 ## Once you are happy with this file, please run
227 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
229 from WMCore.Configuration import Configuration
230 config = Configuration()
232 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
233 config.section_("General")
234 config.General.requestName = 'pickEvents'
235 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
238 config.section_("JobType")
239 config.JobType.pluginName = 'Analysis'
240 config.JobType.psetName = '%(copyPickMerge)s'
241 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
243 config.section_("Data")
244 config.Data.inputDataset = '%(dataset)s'
246 config.Data.inputDBS = 'global'
247 config.Data.splitting = 'LumiBased'
248 config.Data.unitsPerJob = 5
249 config.Data.lumiMask = '%(json)s'
250 #config.Data.publication = True
251 #config.Data.publishDbsUrl = 'phys03'
252 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
253 #config.JobType.allowNonProductionCMSSW=True
255 config.section_("Site")
256 ## Change site name accordingly
257 config.Site.storageSite = "T2_US_Wisconsin"
267 if __name__ ==
"__main__":
269 parser = optparse.OptionParser (
"Usage: %prog [options] dataset events_or_events.txt", description=
'''This program
270 facilitates picking specific events from a data set. For full details, please visit
271 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
272 parser.add_option (
'--output', dest=
'base', type=
'string',
273 default=
'pickevents',
274 help=
'Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
275 parser.add_option (
'--runInteractive', dest=
'runInteractive', action=
'store_true',
276 help =
'Call "cmsRun" command if possible. Can take a long time.')
277 parser.add_option (
'--printInteractive', dest=
'printInteractive', action=
'store_true',
278 help =
'Print "cmsRun" command instead of running it.')
279 parser.add_option (
'--maxEventsInteractive', dest=
'maxEventsInteractive', type=
'int',
281 help =
'Maximum number of events allowed to be processed interactively.')
282 parser.add_option (
'--crab', dest=
'crab', action=
'store_true',
283 help =
'Force CRAB setup instead of interactive mode')
284 parser.add_option (
'--crabCondor', dest=
'crabCondor', action=
'store_true',
285 help =
'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
286 parser.add_option (
'--email', dest=
'email', type=
'string',
288 help=
"Specify email for CRAB (default '%s')" % email )
289 (options, args) = parser.parse_args()
296 if not options.email:
297 options.email = email
299 Event.dataset = args.pop(0)
300 commentRE = re.compile (
r'#.+$')
301 colonRE = re.compile (
r':')
303 if len (args) > 1
or colonRE.search (args[0]):
307 event = Event (piece)
309 raise RuntimeError(
"'%s' is not a proper event" % piece)
310 eventList.append (event)
313 source = open(args[0],
'r')
315 line = commentRE.sub (
'', line)
319 print "Skipping '%s'." % line.strip()
321 eventList.append(event)
325 print "No events defined. Aborting."
328 if len (eventList) > options.maxEventsInteractive:
336 if options.runInteractive:
337 raise RuntimeError(
"This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
338 runsAndLumis = [ (event.run, event.lumi)
for event
in eventList]
339 json = LumiList (lumis = runsAndLumis)
341 sorted( [
"%d:%d" % (event.run, event.event)
for event
in eventList ] ) )
342 crabDict = setupCrabDict (options)
343 json.writeJSON (crabDict[
'json'])
344 target = open (crabDict[
'runEvent'],
'w')
345 target.write (
"%s\n" % eventsToProcess)
347 target = open (crabDict[
'crabcfg'],
'w')
348 target.write (crabTemplate % crabDict)
350 print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
351 if options.crabCondor:
352 print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
353 if not os.path.exists (
'%s/.profile' % os.environ.get(
'HOME')):
354 print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
355 print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
364 for event
in eventList:
365 eventFiles = getFileNames (event)
366 if eventFiles == [
'[]']:
367 print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
368 eventPurgeList.append( event )
370 files.extend( eventFiles )
372 for event
in eventPurgeList:
373 eventList.remove( event )
377 for filename
in files:
378 if filename
in fileSet:
380 fileSet.add (filename)
381 uniqueFiles.append (filename)
382 source =
','.join (uniqueFiles) +
'\n'
383 eventsToProcess =
','.
join(\
384 sorted( [
"%d:%d" % (event.run, event.event)
for event
in eventList ] ) )
385 command =
'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
386 % (options.base, eventsToProcess, source)
387 print "\n%s" % command
388 if options.runInteractive
and not options.printInteractive:
def getFileNames
Subroutines ##.
static std::string join(char **cmd)