CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixInjector.py
Go to the documentation of this file.
1 import sys
2 import json
3 import os
4 import copy
5 import multiprocessing
6 
8  if opt.show:
9  print 'Not injecting to wmagent in --show mode. Need to run the worklfows.'
10  sys.exit(-1)
11  if opt.wmcontrol=='init':
12  #init means it'll be in test mode
13  opt.nProcs=0
14  if opt.wmcontrol=='test':
15  #means the wf were created already, and we just dryRun it.
16  opt.dryRun=True
17  if opt.wmcontrol=='submit' and opt.nProcs==0:
18  print 'Not injecting to wmagent in -j 0 mode. Need to run the worklfows.'
19  sys.exit(-1)
20  if opt.wmcontrol=='force':
21  print "This is an expert setting, you'd better know what you're doing"
22  opt.dryRun=True
23 
24 def upload_to_couch_oneArg(arguments):
25  from modules.wma import upload_to_couch
26  (filePath,labelInCouch,user,group,where) = arguments
27  cacheId=upload_to_couch(filePath,
28  labelInCouch,
29  user,
30  group,
31  test_mode=False,
32  url=where)
33  return cacheId
34 
35 
37 
38  def __init__(self,opt,mode='init',options=''):
39  self.count=1040
40 
41  self.dqmgui=None
42  self.wmagent=None
43  for k in options.split(','):
44  if k.startswith('dqm:'):
45  self.dqmgui=k.split(':',1)[-1]
46  elif k.startswith('wma:'):
47  self.wmagent=k.split(':',1)[-1]
48 
49  self.testMode=((mode!='submit') and (mode!='force'))
50  self.version =1
51  self.keep = opt.keep
52 
53  #wagemt stuff
54  if not self.wmagent:
55  self.wmagent=os.getenv('WMAGENT_REQMGR')
56  if not self.wmagent:
57  if not opt.testbed :
58  self.wmagent = 'cmsweb.cern.ch'
59  self.DbsUrl = "https://"+self.wmagent+"/dbs/prod/global/DBSReader"
60  else :
61  self.wmagent = 'cmsweb-testbed.cern.ch'
62  self.DbsUrl = "https://"+self.wmagent+"/dbs/int/global/DBSReader"
63 
64  if not self.dqmgui:
65  self.dqmgui="https://cmsweb.cern.ch/dqm/relval"
66  #couch stuff
67  self.couch = 'https://'+self.wmagent+'/couchdb'
68 # self.couchDB = 'reqmgr_config_cache'
69  self.couchCache={} # so that we do not upload like crazy, and recyle cfgs
70  self.user = os.getenv('USER')
71  self.group = 'ppd'
72  self.label = 'RelValSet_'+os.getenv('CMSSW_VERSION').replace('-','')+'_v'+str(self.version)
73  self.speciallabel=''
74  if opt.label:
75  self.speciallabel= '_'+opt.label
76 
77 
78  if not os.getenv('WMCORE_ROOT'):
79  print '\n\twmclient is not setup properly. Will not be able to upload or submit requests.\n'
80  if not self.testMode:
81  print '\n\t QUIT\n'
82  sys.exit(-18)
83  else:
84  print '\n\tFound wmclient\n'
85 
86  self.defaultChain={
87  "RequestType" : "TaskChain", #this is how we handle relvals
88  "SubRequestType" : "RelVal", #this is how we handle relvals, now that TaskChain is also used for central MC production
89  "RequestPriority": 500000,
90  "Requestor": self.user, #Person responsible
91  "Group": self.group, #group for the request
92  "CMSSWVersion": os.getenv('CMSSW_VERSION'), #CMSSW Version (used for all tasks in chain)
93  "Campaign": os.getenv('CMSSW_VERSION'), # only for wmstat purpose
94  "ScramArch": os.getenv('SCRAM_ARCH'), #Scram Arch (used for all tasks in chain)
95  "ProcessingVersion": self.version, #Processing Version (used for all tasks in chain)
96  "GlobalTag": None, #Global Tag (overridden per task)
97  "CouchURL": self.couch, #URL of CouchDB containing Config Cache
98  "ConfigCacheURL": self.couch, #URL of CouchDB containing Config Cache
99  "DbsUrl": self.DbsUrl,
100  #- Will contain all configs for all Tasks
101  #"SiteWhitelist" : ["T2_CH_CERN", "T1_US_FNAL"], #Site whitelist
102  "TaskChain" : None, #Define number of tasks in chain.
103  "nowmTasklist" : [], #a list of tasks as we put them in
104  "unmergedLFNBase" : "/store/unmerged",
105  "mergedLFNBase" : "/store/relval",
106  "dashboardActivity" : "relval",
107  "Multicore" : opt.nThreads,
108  "Memory" : 3000,
109  "SizePerEvent" : 1234,
110  "TimePerEvent" : 0.1
111  }
112 
114  "EnableHarvesting" : "True",
115  "DQMUploadUrl" : self.dqmgui,
116  "DQMConfigCacheID" : None
117  }
118 
120  "TaskName" : None, #Task Name
121  "ConfigCacheID" : None, #Generator Config id
122  "GlobalTag": None,
123  "SplittingAlgo" : "EventBased", #Splitting Algorithm
124  "EventsPerJob" : None, #Size of jobs in terms of splitting algorithm
125  "RequestNumEvents" : None, #Total number of events to generate
126  "Seeding" : "AutomaticSeeding", #Random seeding method
127  "PrimaryDataset" : None, #Primary Dataset to be created
128  "nowmIO": {},
129  "KeepOutput" : False
130  }
132  "TaskName" : "DigiHLT", #Task Name
133  "ConfigCacheID" : None, #Processing Config id
134  "GlobalTag": None,
135  "InputDataset" : None, #Input Dataset to be processed
136  "SplittingAlgo" : "LumiBased", #Splitting Algorithm
137  "LumisPerJob" : 10, #Size of jobs in terms of splitting algorithm
138  "nowmIO": {},
139  "KeepOutput" : False
140  }
141  self.defaultTask={
142  "TaskName" : None, #Task Name
143  "InputTask" : None, #Input Task Name (Task Name field of a previous Task entry)
144  "InputFromOutputModule" : None, #OutputModule name in the input task that will provide files to process
145  "ConfigCacheID" : None, #Processing Config id
146  "GlobalTag": None,
147  "SplittingAlgo" : "LumiBased", #Splitting Algorithm
148  "LumisPerJob" : 10, #Size of jobs in terms of splitting algorithm
149  "nowmIO": {},
150  "KeepOutput" : False
151  }
152 
153  self.chainDicts={}
154 
155 
156  def prepare(self,mReader, directories, mode='init'):
157  try:
158  #from Configuration.PyReleaseValidation.relval_steps import wmsplit
159  wmsplit = {}
160  wmsplit['DIGIHI']=5
161  wmsplit['RECOHI']=5
162  wmsplit['HLTD']=5
163  wmsplit['RECODreHLT']=2
164  wmsplit['DIGIPU']=4
165  wmsplit['DIGIPU1']=4
166  wmsplit['RECOPU1']=1
167  wmsplit['DIGIUP15_PU50']=1
168  wmsplit['RECOUP15_PU50']=1
169  wmsplit['DIGIUP15_PU25']=1
170  wmsplit['RECOUP15_PU25']=1
171  wmsplit['DIGIHIMIX']=5
172  wmsplit['RECOHIMIX']=5
173  wmsplit['RECODSplit']=1
174  wmsplit['SingleMuPt10_UP15_ID']=1
175  wmsplit['DIGIUP15_ID']=1
176  wmsplit['RECOUP15_ID']=1
177  wmsplit['TTbar_13_ID']=1
178  wmsplit['SingleMuPt10FS_ID']=1
179  wmsplit['TTbarFS_ID']=1
180  wmsplit['RECODR2_50nsreHLT']=1
181  wmsplit['RECODR2_25nsreHLT']=1
182  wmsplit['HLTDR2_50ns']=1
183  wmsplit['HLTDR2_25ns']=1
184  wmsplit['Hadronizer']=1
185  wmsplit['REMINIAODPROD']=1
186  wmsplit['REMINIAOD']=1
187  wmsplit['REMINIAOD_PU50']=1
188  wmsplit['REMINIAOD_PU25']=1
189  wmsplit['REMINIAODDR2_50ns']=1
190  wmsplit['REMINIAODDR2_25ns']=1
191 
192  #import pprint
193  #pprint.pprint(wmsplit)
194  except:
195  print "Not set up for step splitting"
196  wmsplit={}
197 
198  acqEra=False
199  for (n,dir) in directories.items():
200  chainDict=copy.deepcopy(self.defaultChain)
201  print "inspecting",dir
202  nextHasDSInput=None
203  for (x,s) in mReader.workFlowSteps.items():
204  #x has the format (num, prefix)
205  #s has the format (num, name, commands, stepList)
206  if x[0]==n:
207  #print "found",n,s[3]
208  #chainDict['RequestString']='RV'+chainDict['CMSSWVersion']+s[1].split('+')[0]
209  index=0
210  splitForThisWf=None
211  thisLabel=self.speciallabel
212  #if 'HARVESTGEN' in s[3]:
213  if len( [step for step in s[3] if "HARVESTGEN" in step] )>0:
214  chainDict['TimePerEvent']=0.01
215  thisLabel=thisLabel+"_gen"
216  # for re-miniAOD test
217  if len( [step for step in s[3] if "REMINIAOD" in step] )>0:
218  thisLabel=thisLabel+"_ReMiniAOD"
219  processStrPrefix=''
220  setPrimaryDs=None
221  for step in s[3]:
222 
223  if 'INPUT' in step or (not isinstance(s[2][index],str)):
224  nextHasDSInput=s[2][index]
225 
226  else:
227 
228  if (index==0):
229  #first step and not input -> gen part
230  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultScratch))
231  try:
232  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
233  except:
234  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
235  return -15
236 
237  chainDict['nowmTasklist'][-1]['PrimaryDataset']='RelVal'+s[1].split('+')[0]
238  if not '--relval' in s[2][index]:
239  print 'Impossible to create task from scratch without splitting information with --relval'
240  return -12
241  else:
242  arg=s[2][index].split()
243  ns=map(int,arg[arg.index('--relval')+1].split(','))
244  chainDict['nowmTasklist'][-1]['RequestNumEvents'] = ns[0]
245  chainDict['nowmTasklist'][-1]['EventsPerJob'] = ns[1]
246  if 'FASTSIM' in s[2][index] or '--fast' in s[2][index]:
247  thisLabel+='_FastSim'
248  if 'lhe' in s[2][index] in s[2][index]:
249  chainDict['nowmTasklist'][-1]['LheInputFiles'] =True
250 
251  elif nextHasDSInput:
252  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultInput))
253  try:
254  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
255  except:
256  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
257  return -15
258  chainDict['nowmTasklist'][-1]['InputDataset']=nextHasDSInput.dataSet
259  splitForThisWf=nextHasDSInput.split
260  chainDict['nowmTasklist'][-1]['LumisPerJob']=splitForThisWf
261  if step in wmsplit:
262  chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit[step]
263  # get the run numbers or #events
264  if len(nextHasDSInput.run):
265  chainDict['nowmTasklist'][-1]['RunWhitelist']=nextHasDSInput.run
266  if len(nextHasDSInput.ls):
267  chainDict['nowmTasklist'][-1]['LumiList']=nextHasDSInput.ls
268  #print "what is s",s[2][index]
269  if '--data' in s[2][index] and nextHasDSInput.label:
270  thisLabel+='_RelVal_%s'%nextHasDSInput.label
271  if 'filter' in chainDict['nowmTasklist'][-1]['nowmIO']:
272  print "This has an input DS and a filter sequence: very likely to be the PyQuen sample"
273  processStrPrefix='PU_'
274  setPrimaryDs = 'RelVal'+s[1].split('+')[0]
275  if setPrimaryDs:
276  chainDict['nowmTasklist'][-1]['PrimaryDataset']=setPrimaryDs
277  nextHasDSInput=None
278  else:
279  #not first step and no inputDS
280  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultTask))
281  try:
282  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
283  except:
284  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
285  return -15
286  if splitForThisWf:
287  chainDict['nowmTasklist'][-1]['LumisPerJob']=splitForThisWf
288  if step in wmsplit:
289  chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit[step]
290 
291  # change LumisPerJob for Hadronizer steps.
292  if 'Hadronizer' in step:
293  chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit['Hadronizer']
294 
295  #print step
296  chainDict['nowmTasklist'][-1]['TaskName']=step
297  if setPrimaryDs:
298  chainDict['nowmTasklist'][-1]['PrimaryDataset']=setPrimaryDs
299  chainDict['nowmTasklist'][-1]['ConfigCacheID']='%s/%s.py'%(dir,step)
300  chainDict['nowmTasklist'][-1]['GlobalTag']=chainDict['nowmTasklist'][-1]['nowmIO']['GT'] # copy to the proper parameter name
301  chainDict['GlobalTag']=chainDict['nowmTasklist'][-1]['nowmIO']['GT'] #set in general to the last one of the chain
302  if 'pileup' in chainDict['nowmTasklist'][-1]['nowmIO']:
303  chainDict['nowmTasklist'][-1]['MCPileup']=chainDict['nowmTasklist'][-1]['nowmIO']['pileup']
304  if '--pileup ' in s[2][index]: # catch --pileup (scenarion) and not --pileup_ (dataset to be mixed) => works also making PRE-MIXed dataset
305  processStrPrefix='PU_' # take care of pu overlay done with GEN-SIM mixing
306  if ( s[2][index].split()[ s[2][index].split().index('--pileup')+1 ] ).find('25ns') > 0 :
307  processStrPrefix='PU25ns_'
308  elif ( s[2][index].split()[ s[2][index].split().index('--pileup')+1 ] ).find('50ns') > 0 :
309  processStrPrefix='PU50ns_'
310  if 'DIGIPREMIX_S2' in s[2][index] : # take care of pu overlay done with DIGI mixing of premixed events
311  if s[2][index].split()[ s[2][index].split().index('--pileup_input')+1 ].find('25ns') > 0 :
312  processStrPrefix='PUpmx25ns_'
313  elif s[2][index].split()[ s[2][index].split().index('--pileup_input')+1 ].find('50ns') > 0 :
314  processStrPrefix='PUpmx50ns_'
315 
316  if acqEra:
317  #chainDict['AcquisitionEra'][step]=(chainDict['CMSSWVersion']+'-PU_'+chainDict['nowmTasklist'][-1]['GlobalTag']).replace('::All','')+thisLabel
318  chainDict['AcquisitionEra'][step]=chainDict['CMSSWVersion']
319  chainDict['ProcessingString'][step]=processStrPrefix+chainDict['nowmTasklist'][-1]['GlobalTag'].replace('::All','')+thisLabel
320  else:
321  #chainDict['nowmTasklist'][-1]['AcquisitionEra']=(chainDict['CMSSWVersion']+'-PU_'+chainDict['nowmTasklist'][-1]['GlobalTag']).replace('::All','')+thisLabel
322  chainDict['nowmTasklist'][-1]['AcquisitionEra']=chainDict['CMSSWVersion']
323  chainDict['nowmTasklist'][-1]['ProcessingString']=processStrPrefix+chainDict['nowmTasklist'][-1]['GlobalTag'].replace('::All','')+thisLabel
324 
325  index+=1
326  #end of loop through steps
327  chainDict['RequestString']='RV'+chainDict['CMSSWVersion']+s[1].split('+')[0]
328  if processStrPrefix or thisLabel:
329  chainDict['RequestString']+='_'+processStrPrefix+thisLabel
330 
331 
332 
333  #wrap up for this one
334  import pprint
335  #print 'wrapping up'
336  #pprint.pprint(chainDict)
337  #loop on the task list
338  for i_second in reversed(range(len(chainDict['nowmTasklist']))):
339  t_second=chainDict['nowmTasklist'][i_second]
340  #print "t_second taskname", t_second['TaskName']
341  if 'primary' in t_second['nowmIO']:
342  #print t_second['nowmIO']['primary']
343  primary=t_second['nowmIO']['primary'][0].replace('file:','')
344  for i_input in reversed(range(0,i_second)):
345  t_input=chainDict['nowmTasklist'][i_input]
346  for (om,o) in t_input['nowmIO'].items():
347  if primary in o:
348  #print "found",primary,"procuced by",om,"of",t_input['TaskName']
349  t_second['InputTask'] = t_input['TaskName']
350  t_second['InputFromOutputModule'] = om
351  #print 't_second',pprint.pformat(t_second)
352  if t_second['TaskName'].startswith('HARVEST'):
353  chainDict.update(copy.deepcopy(self.defaultHarvest))
354  chainDict['DQMConfigCacheID']=t_second['ConfigCacheID']
355  ## the info are not in the task specific dict but in the general dict
356  #t_input.update(copy.deepcopy(self.defaultHarvest))
357  #t_input['DQMConfigCacheID']=t_second['ConfigCacheID']
358  break
359 
360  ## there is in fact only one acquisition era
361  #if len(set(chainDict['AcquisitionEra'].values()))==1:
362  # print "setting only one acq"
363  if acqEra:
364  chainDict['AcquisitionEra'] = chainDict['AcquisitionEra'].values()[0]
365 
366  ## clean things up now
367  itask=0
368  if self.keep:
369  for i in self.keep:
370  if type(i)==int and i < len(chainDict['nowmTasklist']):
371  chainDict['nowmTasklist'][i]['KeepOutput']=True
372  for (i,t) in enumerate(chainDict['nowmTasklist']):
373  if t['TaskName'].startswith('HARVEST'):
374  continue
375  if not self.keep:
376  t['KeepOutput']=True
377  elif t['TaskName'] in self.keep:
378  t['KeepOutput']=True
379  t.pop('nowmIO')
380  itask+=1
381  chainDict['Task%d'%(itask)]=t
382 
383 
384  ##
385 
386 
387  ## provide the number of tasks
388  chainDict['TaskChain']=itask#len(chainDict['nowmTasklist'])
389 
390  chainDict.pop('nowmTasklist')
391  self.chainDicts[n]=chainDict
392 
393 
394  return 0
395 
396  def uploadConf(self,filePath,label,where):
397  labelInCouch=self.label+'_'+label
398  cacheName=filePath.split('/')[-1]
399  if self.testMode:
400  self.count+=1
401  print '\tFake upload of',filePath,'to couch with label',labelInCouch
402  return self.count
403  else:
404  try:
405  from modules.wma import upload_to_couch,DATABASE_NAME
406  except:
407  print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
408  print '\n\t QUIT\n'
409  sys.exit(-16)
410 
411  if cacheName in self.couchCache:
412  print "Not re-uploading",filePath,"to",where,"for",label
413  cacheId=self.couchCache[cacheName]
414  else:
415  print "Loading",filePath,"to",where,"for",label
416  ## totally fork the upload to couch to prevent cross loading of process configurations
417  pool = multiprocessing.Pool(1)
418  cacheIds = pool.map( upload_to_couch_oneArg, [(filePath,labelInCouch,self.user,self.group,where)] )
419  cacheId = cacheIds[0]
420  self.couchCache[cacheName]=cacheId
421  return cacheId
422 
423  def upload(self):
424  for (n,d) in self.chainDicts.items():
425  for it in d:
426  if it.startswith("Task") and it!='TaskChain':
427  #upload
428  couchID=self.uploadConf(d[it]['ConfigCacheID'],
429  str(n)+d[it]['TaskName'],
430  d['CouchURL']
431  )
432  print d[it]['ConfigCacheID']," uploaded to couchDB for",str(n),"with ID",couchID
433  d[it]['ConfigCacheID']=couchID
434  if it =='DQMConfigCacheID':
435  couchID=self.uploadConf(d['DQMConfigCacheID'],
436  str(n)+'harvesting',
437  d['CouchURL']
438  )
439  print d['DQMConfigCacheID'],"uploaded to couchDB for",str(n),"with ID",couchID
440  d['DQMConfigCacheID']=couchID
441 
442 
443  def submit(self):
444  try:
445  from modules.wma import makeRequest,approveRequest
446  from wmcontrol import random_sleep
447  print '\n\tFound wmcontrol\n'
448  except:
449  print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
450  if not self.testMode:
451  print '\n\t QUIT\n'
452  sys.exit(-17)
453 
454  import pprint
455  for (n,d) in self.chainDicts.items():
456  if self.testMode:
457  print "Only viewing request",n
458  print pprint.pprint(d)
459  else:
460  #submit to wmagent each dict
461  print "For eyes before submitting",n
462  print pprint.pprint(d)
463  print "Submitting",n,"..........."
464  workFlow=makeRequest(self.wmagent,d,encodeDict=True)
465  approveRequest(self.wmagent,workFlow)
466  print "...........",n,"submitted"
467  random_sleep()
468 
469 
470 
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:7
list object
Definition: dbtoconf.py:77
def performInjectionOptionTest
def upload_to_couch_oneArg
double split
Definition: MVATrainer.cc:139