11 from argparse
import ArgumentParser, ArgumentDefaultsHelpFormatter
14 from FWCore.PythonUtilities.LumiList
import LumiList
16 from pprint
import pprint
17 from datetime
import datetime
19 import Utilities.General.cmssw_das_client
as das_client
23 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2 27 edmPickEvent.py dataset listOfEvents.txt 29 listOfEvents is a text file: 30 # this line is ignored as a comment 31 # since '#' is a valid comment character 32 run1 lumi_section1 event1 33 run2 lumi_section2 event2 41 run, lumi_section, and event are integers that you can get from 44 dataset: it just a name of the physics dataset, if you don't know exact name 45 you can provide a mask, e.g.: *QCD*RAW 47 For updated information see Wiki: 48 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents 59 splitRE = re.compile (
r'[\s:,]+')
61 pieces = Event.splitRE.split (line.strip())
63 self[
'run'] =
int( pieces[0] )
64 self[
'lumi'] =
int( pieces[1] )
65 self[
'event'] =
int( pieces[2] )
66 self[
'dataset'] = Event.dataset
68 raise RuntimeError(
"Can not parse '%s' as Event object" \
70 if not self[
'dataset']:
71 print(
"No dataset is defined for '%s'. Aborting." % line.strip())
72 raise RuntimeError(
'Missing dataset')
78 return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
86 """Return files for given DAS query""" 87 if client ==
'das_client':
89 elif client ==
'dasgoclient':
92 for path
in os.getenv(
'PATH').
split(
':'):
93 if os.path.isfile(os.path.join(path,
'dasgoclient')):
98 """Return files for given DAS query via das_client""" 101 query =
"file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
103 status = jsondict[
'status']
105 print(
"DAS query status: %s"%(status))
108 mongo_query = jsondict[
'mongo_query']
109 filters = mongo_query[
'filters']
110 data = jsondict[
'data']
115 if len(file) > 0
and not file
in files:
121 """Return files for given DAS query via dasgoclient""" 122 query =
"file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
123 cmd = [
'dasgoclient',
'-query', query,
'-json']
124 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
126 err = proc.stderr.read()
128 print(
"DAS error: %s" % err)
129 print(proc.stdout.read())
132 dasout = proc.stdout.read()
134 for row
in json.loads(dasout):
135 for rec
in row.get(
'file', []):
136 fname = rec.get(
'name',
'')
145 base = os.environ.get (
'CMSSW_BASE')
147 raise RuntimeError(
"CMSSW Environment not set")
148 retval =
"%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
150 if os.path.exists (retval):
152 base = os.environ.get (
'CMSSW_RELEASE_BASE')
153 retval =
"%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
155 if os.path.exists (retval):
157 raise RuntimeError(
"Could not find copyPickMerge_cfg.py")
160 return '%s@%s' % (subprocess.getoutput (
'whoami'),
161 '.'.
join(subprocess.getoutput(
'hostname').
split(
'.')[-2:]))
164 date = datetime.now().strftime(
'%Y%m%d_%H%M%S')
167 crab[
'runEvent'] =
'%s_runEvents.txt' % base
169 crab[
'output'] =
'%s.root' % base
170 crab[
'crabcfg'] =
'%s_crab.py' % base
171 crab[
'json'] =
'%s.json' % base
172 crab[
'dataset'] = Event.dataset
173 crab[
'email'] = options.email
174 crab[
'WorkArea'] = date
175 if options.crabCondor:
176 crab[
'scheduler'] =
'condor' 179 crab[
'scheduler'] =
'remoteGlidein' 181 crab[
'useServer'] =
'' 186 ## Edited By Raman Khurana 188 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab 190 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters 192 ## Once you are happy with this file, please run 195 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library: 197 from WMCore.Configuration import Configuration 198 config = Configuration() 200 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters 201 config.section_("General") 202 config.General.requestName = 'pickEvents' 203 config.General.workArea = 'crab_pickevents_%(WorkArea)s' 206 config.section_("JobType") 207 config.JobType.pluginName = 'Analysis' 208 config.JobType.psetName = '%(copyPickMerge)s' 209 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s'] 211 config.section_("Data") 212 config.Data.inputDataset = '%(dataset)s' 214 config.Data.inputDBS = 'global' 215 config.Data.splitting = 'LumiBased' 216 config.Data.unitsPerJob = 5 217 config.Data.lumiMask = '%(json)s' 218 #config.Data.publication = True 219 #config.Data.publishDbsUrl = 'phys03' 220 #config.Data.publishDataName = 'CRAB3_CSA_DYJets' 221 #config.JobType.allowNonProductionCMSSW=True 223 config.section_("Site") 224 ## Change site name accordingly 225 config.Site.storageSite = "T2_US_Wisconsin" 235 if __name__ ==
"__main__":
237 parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description=
'''This program 238 facilitates picking specific events from a data set. For full details, please visit 239 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents''')
240 parser.add_argument(
'--output', dest=
'base', type=str,
241 default=
'pickevents',
242 help=
'Base name to use for output files (root, JSON, run and event list, etc.)")')
243 parser.add_argument(
'--runInteractive', dest=
'runInteractive', action=
'store_true',
244 help =
'Call "cmsRun" command if possible. Can take a long time.')
245 parser.add_argument(
'--printInteractive', dest=
'printInteractive', action=
'store_true',
246 help =
'Print "cmsRun" command instead of running it.')
247 parser.add_argument(
'--maxEventsInteractive', dest=
'maxEventsInteractive', type=int,
249 help =
'Maximum number of events allowed to be processed interactively.')
250 parser.add_argument(
'--crab', dest=
'crab', action=
'store_true',
251 help =
'Force CRAB setup instead of interactive mode')
252 parser.add_argument(
'--crabCondor', dest=
'crabCondor', action=
'store_true',
253 help =
'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
254 parser.add_argument(
'--email', dest=
'email', type=str,
256 help=
"Specify email for CRAB")
258 parser.add_argument(
'--das-client', dest=
'das_cli', type=str,
260 help=
"Specify das client to use")
261 parser.add_argument(
"dataset", type=str)
262 parser.add_argument(
"events", metavar=
"events_or_events.txt", type=str, nargs=
'+')
263 options = parser.parse_args()
265 Event.dataset = options.dataset
266 commentRE = re.compile (
r'#.+$')
267 colonRE = re.compile (
r':')
269 if len (options.events) > 1
or colonRE.search (options.events[0]):
271 for piece
in options.events:
273 event = Event (piece)
275 raise RuntimeError(
"'%s' is not a proper event" % piece)
276 eventList.append (event)
279 source = open(options.events[0],
'r') 281 line = commentRE.sub (
'', line)
285 print(
"Skipping '%s'." % line.strip())
287 eventList.append(event)
291 print(
"No events defined. Aborting.")
294 if len (eventList) > options.maxEventsInteractive:
302 if options.runInteractive:
303 raise RuntimeError(
"This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
304 runsAndLumis = [ (event.run, event.lumi)
for event
in eventList]
305 json = LumiList (lumis = runsAndLumis)
307 sorted( [
"%d:%d" % (event.run, event.event)
for event
in eventList ] ) )
308 crabDict = setupCrabDict (options)
309 json.writeJSON (crabDict[
'json'])
310 target = open (crabDict[
'runEvent'],
'w')
311 target.write (
"%s\n" % eventsToProcess)
313 target = open (crabDict[
'crabcfg'],
'w')
314 target.write (crabTemplate % crabDict)
316 print(
"Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
317 if options.crabCondor:
318 print(
"You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
319 if not os.path.exists (
'%s/.profile' % os.environ.get(
'HOME')):
320 print(
"** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
321 print(
"Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
330 for event
in eventList:
332 if eventFiles == [
'[]']:
333 print(
"** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
334 eventPurgeList.append( event )
336 files.extend( eventFiles )
338 for event
in eventPurgeList:
339 eventList.remove( event )
343 for filename
in files:
344 if filename
in fileSet:
346 fileSet.add (filename)
347 uniqueFiles.append (filename)
348 source =
','.join (uniqueFiles) +
'\n' 349 eventsToProcess =
','.
join(\
350 sorted( [
"%d:%d" % (event.run, event.event)
for event
in eventList ] ) )
351 command =
'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
352 % (options.base, eventsToProcess, source)
353 print(
"\n%s" % command)
354 if options.runInteractive
and not options.printInteractive:
def get_value(data, filters, base=10)
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, capath=None, qcache=0, das_headers=True)
def getFileNames_dasgoclient(event)
def getFileNames(event, client=None)
Subroutines ##.
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
def __getattr__(self, key)
def split(sequence, size)
static std::string join(char **cmd)
def getFileNames_das_client(event)
def __init__(self, line, kwargs)
def setupCrabDict(options)