CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixInjector.py
Go to the documentation of this file.
1 import sys
2 import json
3 import os
4 import copy
5 import multiprocessing
6 
8  if opt.show:
9  print 'Not injecting to wmagent in --show mode. Need to run the worklfows.'
10  sys.exit(-1)
11  if opt.wmcontrol=='init':
12  #init means it'll be in test mode
13  opt.nThreads=0
14  if opt.wmcontrol=='test':
15  #means the wf were created already, and we just dryRun it.
16  opt.dryRun=True
17  if opt.wmcontrol=='submit' and opt.nThreads==0:
18  print 'Not injecting to wmagent in -j 0 mode. Need to run the worklfows.'
19  sys.exit(-1)
20  if opt.wmcontrol=='force':
21  print "This is an expert setting, you'd better know what you're doing"
22  opt.dryRun=True
23 
24 def upload_to_couch_oneArg(arguments):
25  from modules.wma import upload_to_couch
26  (filePath,labelInCouch,user,group,where) = arguments
27  cacheId=upload_to_couch(filePath,
28  labelInCouch,
29  user,
30  group,
31  test_mode=False,
32  url=where)
33  return cacheId
34 
35 
37 
38  def __init__(self,opt,mode='init',options=''):
39  self.count=1040
40 
41  self.dqmgui=None
42  self.wmagent=None
43  for k in options.split(','):
44  if k.startswith('dqm:'):
45  self.dqmgui=k.split(':',1)[-1]
46  elif k.startswith('wma:'):
47  self.wmagent=k.split(':',1)[-1]
48 
49  self.testMode=((mode!='submit') and (mode!='force'))
50  self.version =1
51  self.keep = opt.keep
52 
53  #wagemt stuff
54  if not self.wmagent:
55  self.wmagent=os.getenv('WMAGENT_REQMGR')
56  if not self.wmagent:
57  self.wmagent = 'cmsweb.cern.ch'
58 
59  if not self.dqmgui:
60  self.dqmgui="https://cmsweb.cern.ch/dqm/relval"
61  #couch stuff
62  self.couch = 'https://'+self.wmagent+'/couchdb'
63 # self.couchDB = 'reqmgr_config_cache'
64  self.couchCache={} # so that we do not upload like crazy, and recyle cfgs
65  self.user = os.getenv('USER')
66  self.group = 'ppd'
67  self.label = 'RelValSet_'+os.getenv('CMSSW_VERSION').replace('-','')+'_v'+str(self.version)
68  self.speciallabel=''
69  if opt.label:
70  self.speciallabel= '_'+opt.label
71 
72 
73  if not os.getenv('WMCORE_ROOT'):
74  print '\n\twmclient is not setup properly. Will not be able to upload or submit requests.\n'
75  if not self.testMode:
76  print '\n\t QUIT\n'
77  sys.exit(-18)
78  else:
79  print '\n\tFound wmclient\n'
80 
81  self.defaultChain={
82  "RequestType" : "TaskChain", #this is how we handle relvals
83  "Requestor": self.user, #Person responsible
84  "Group": self.group, #group for the request
85  "CMSSWVersion": os.getenv('CMSSW_VERSION'), #CMSSW Version (used for all tasks in chain)
86  "Campaign": os.getenv('CMSSW_VERSION'), # only for wmstat purpose
87  "ScramArch": os.getenv('SCRAM_ARCH'), #Scram Arch (used for all tasks in chain)
88  "ProcessingVersion": self.version, #Processing Version (used for all tasks in chain)
89  "GlobalTag": None, #Global Tag (overridden per task)
90  "CouchURL": self.couch, #URL of CouchDB containing Config Cache
91  "ConfigCacheURL": self.couch, #URL of CouchDB containing Config Cache
92  "DbsUrl": "https://cmsweb.cern.ch/dbs/prod/global/DBSReader",
93  #- Will contain all configs for all Tasks
94  #"SiteWhitelist" : ["T2_CH_CERN", "T1_US_FNAL"], #Site whitelist
95  "TaskChain" : None, #Define number of tasks in chain.
96  "nowmTasklist" : [], #a list of tasks as we put them in
97  "unmergedLFNBase" : "/store/unmerged",
98  "mergedLFNBase" : "/store/relval",
99  "dashboardActivity" : "relval",
100  "Memory" : 2400,
101  "SizePerEvent" : 1234,
102  "TimePerEvent" : 20
103  }
104 
106  "EnableHarvesting" : "True",
107  "DQMUploadUrl" : self.dqmgui,
108  "DQMConfigCacheID" : None
109  }
110 
112  "TaskName" : None, #Task Name
113  "ConfigCacheID" : None, #Generator Config id
114  "GlobalTag": None,
115  "SplittingAlgo" : "EventBased", #Splitting Algorithm
116  "EventsPerJob" : None, #Size of jobs in terms of splitting algorithm
117  "RequestNumEvents" : None, #Total number of events to generate
118  "Seeding" : "AutomaticSeeding", #Random seeding method
119  "PrimaryDataset" : None, #Primary Dataset to be created
120  "nowmIO": {},
121  "KeepOutput" : False
122  }
124  "TaskName" : "DigiHLT", #Task Name
125  "ConfigCacheID" : None, #Processing Config id
126  "GlobalTag": None,
127  "InputDataset" : None, #Input Dataset to be processed
128  "SplittingAlgo" : "LumiBased", #Splitting Algorithm
129  "LumisPerJob" : 10, #Size of jobs in terms of splitting algorithm
130  "nowmIO": {},
131  "KeepOutput" : False
132  }
133  self.defaultTask={
134  "TaskName" : None, #Task Name
135  "InputTask" : None, #Input Task Name (Task Name field of a previous Task entry)
136  "InputFromOutputModule" : None, #OutputModule name in the input task that will provide files to process
137  "ConfigCacheID" : None, #Processing Config id
138  "GlobalTag": None,
139  "SplittingAlgo" : "LumiBased", #Splitting Algorithm
140  "LumisPerJob" : 10, #Size of jobs in terms of splitting algorithm
141  "nowmIO": {},
142  "KeepOutput" : False
143  }
144 
145  self.chainDicts={}
146 
147 
148  def prepare(self,mReader, directories, mode='init'):
149  try:
150  #from Configuration.PyReleaseValidation.relval_steps import wmsplit
151  wmsplit = {}
152  wmsplit['DIGIHI']=5
153  wmsplit['RECOHI']=5
154  wmsplit['HLTD']=5
155  wmsplit['RECODreHLT']=2
156  wmsplit['DIGIPU']=4
157  wmsplit['DIGIPU1']=4
158  wmsplit['RECOPU1']=1
159  wmsplit['DIGIUP15_PU50']=1
160  wmsplit['RECOUP15_PU50']=1
161  wmsplit['DIGIUP15_PU25']=1
162  wmsplit['RECOUP15_PU25']=1
163  wmsplit['DIGIHISt3']=5
164  wmsplit['RECODSplit']=1
165  wmsplit['SingleMuPt10_ID']=1
166  wmsplit['DIGI_ID']=1
167  wmsplit['RECO_ID']=1
168  wmsplit['TTbar_ID']=1
169  wmsplit['SingleMuPt10FS_ID']=1
170  wmsplit['TTbarFS_ID']=1
171 
172  #import pprint
173  #pprint.pprint(wmsplit)
174  except:
175  print "Not set up for step splitting"
176  wmsplit={}
177 
178  acqEra=False
179  for (n,dir) in directories.items():
180  chainDict=copy.deepcopy(self.defaultChain)
181  print "inspecting",dir
182  nextHasDSInput=None
183  for (x,s) in mReader.workFlowSteps.items():
184  #x has the format (num, prefix)
185  #s has the format (num, name, commands, stepList)
186  if x[0]==n:
187  #print "found",n,s[3]
188  #chainDict['RequestString']='RV'+chainDict['CMSSWVersion']+s[1].split('+')[0]
189  index=0
190  splitForThisWf=None
191  thisLabel=self.speciallabel
192  processStrPrefix=''
193  setPrimaryDs=None
194  for step in s[3]:
195 
196  if 'INPUT' in step or (not isinstance(s[2][index],str)):
197  nextHasDSInput=s[2][index]
198 
199  else:
200 
201  if (index==0):
202  #first step and not input -> gen part
203  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultScratch))
204  try:
205  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
206  except:
207  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
208  return -15
209 
210  chainDict['nowmTasklist'][-1]['PrimaryDataset']='RelVal'+s[1].split('+')[0]
211  if not '--relval' in s[2][index]:
212  print 'Impossible to create task from scratch without splitting information with --relval'
213  return -12
214  else:
215  arg=s[2][index].split()
216  ns=map(int,arg[arg.index('--relval')+1].split(','))
217  chainDict['nowmTasklist'][-1]['RequestNumEvents'] = ns[0]
218  chainDict['nowmTasklist'][-1]['EventsPerJob'] = ns[1]
219  if 'FASTSIM' in s[2][index] or '--fast' in s[2][index]:
220  thisLabel+='_FastSim'
221 
222  elif nextHasDSInput:
223  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultInput))
224  try:
225  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
226  except:
227  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
228  return -15
229  chainDict['nowmTasklist'][-1]['InputDataset']=nextHasDSInput.dataSet
230  splitForThisWf=nextHasDSInput.split
231  chainDict['nowmTasklist'][-1]['LumisPerJob']=splitForThisWf
232  if step in wmsplit:
233  chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit[step]
234  # get the run numbers or #events
235  if len(nextHasDSInput.run):
236  chainDict['nowmTasklist'][-1]['RunWhitelist']=nextHasDSInput.run
237  #print "what is s",s[2][index]
238  if '--data' in s[2][index] and nextHasDSInput.label:
239  thisLabel+='_RelVal_%s'%nextHasDSInput.label
240  if 'filter' in chainDict['nowmTasklist'][-1]['nowmIO']:
241  print "This has an input DS and a filter sequence: very likely to be the PyQuen sample"
242  processStrPrefix='PU_'
243  setPrimaryDs = 'RelVal'+s[1].split('+')[0]
244  if setPrimaryDs:
245  chainDict['nowmTasklist'][-1]['PrimaryDataset']=setPrimaryDs
246  nextHasDSInput=None
247  else:
248  #not first step and no inputDS
249  chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultTask))
250  try:
251  chainDict['nowmTasklist'][-1]['nowmIO']=json.loads(open('%s/%s.io'%(dir,step)).read())
252  except:
253  print "Failed to find",'%s/%s.io'%(dir,step),".The workflows were probably not run on cfg not created"
254  return -15
255  if splitForThisWf:
256  chainDict['nowmTasklist'][-1]['LumisPerJob']=splitForThisWf
257  if step in wmsplit:
258  chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit[step]
259 
260  #print step
261  chainDict['nowmTasklist'][-1]['TaskName']=step
262  if setPrimaryDs:
263  chainDict['nowmTasklist'][-1]['PrimaryDataset']=setPrimaryDs
264  chainDict['nowmTasklist'][-1]['ConfigCacheID']='%s/%s.py'%(dir,step)
265  chainDict['nowmTasklist'][-1]['GlobalTag']=chainDict['nowmTasklist'][-1]['nowmIO']['GT'] # copy to the proper parameter name
266  chainDict['GlobalTag']=chainDict['nowmTasklist'][-1]['nowmIO']['GT'] #set in general to the last one of the chain
267  if 'pileup' in chainDict['nowmTasklist'][-1]['nowmIO']:
268  chainDict['nowmTasklist'][-1]['MCPileup']=chainDict['nowmTasklist'][-1]['nowmIO']['pileup']
269  if '--pileup ' in s[2][index]: # catch --pileup (scenarion) and not --pileup_ (dataset to be mixed) => works also making PRE-MIXed dataset
270  processStrPrefix='PU_' # take care of pu overlay done with GEN-SIM mixing
271  if ( s[2][index].split()[ s[2][index].split().index('--pileup')+1 ] ).find('25ns') > 0 :
272  processStrPrefix='PU25ns_'
273  elif ( s[2][index].split()[ s[2][index].split().index('--pileup')+1 ] ).find('50ns') > 0 :
274  processStrPrefix='PU50ns_'
275  if 'DIGIPREMIX_S2' in s[2][index] : # take care of pu overlay done with DIGI mixing of premixed events
276  if s[2][index].split()[ s[2][index].split().index('--pileup_input')+1 ].find('25ns') > 0 :
277  processStrPrefix='PUpmx25ns_'
278  elif s[2][index].split()[ s[2][index].split().index('--pileup_input')+1 ].find('50ns') > 0 :
279  processStrPrefix='PUpmx50ns_'
280 
281  if acqEra:
282  #chainDict['AcquisitionEra'][step]=(chainDict['CMSSWVersion']+'-PU_'+chainDict['nowmTasklist'][-1]['GlobalTag']).replace('::All','')+thisLabel
283  chainDict['AcquisitionEra'][step]=chainDict['CMSSWVersion']
284  chainDict['ProcessingString'][step]=processStrPrefix+chainDict['nowmTasklist'][-1]['GlobalTag'].replace('::All','')+thisLabel
285  else:
286  #chainDict['nowmTasklist'][-1]['AcquisitionEra']=(chainDict['CMSSWVersion']+'-PU_'+chainDict['nowmTasklist'][-1]['GlobalTag']).replace('::All','')+thisLabel
287  chainDict['nowmTasklist'][-1]['AcquisitionEra']=chainDict['CMSSWVersion']
288  chainDict['nowmTasklist'][-1]['ProcessingString']=processStrPrefix+chainDict['nowmTasklist'][-1]['GlobalTag'].replace('::All','')+thisLabel
289 
290  index+=1
291  #end of loop through steps
292  chainDict['RequestString']='RV'+chainDict['CMSSWVersion']+s[1].split('+')[0]
293  if processStrPrefix or thisLabel:
294  chainDict['RequestString']+='_'+processStrPrefix+thisLabel
295 
296 
297 
298  #wrap up for this one
299  import pprint
300  #print 'wrapping up'
301  #pprint.pprint(chainDict)
302  #loop on the task list
303  for i_second in reversed(range(len(chainDict['nowmTasklist']))):
304  t_second=chainDict['nowmTasklist'][i_second]
305  #print "t_second taskname", t_second['TaskName']
306  if 'primary' in t_second['nowmIO']:
307  #print t_second['nowmIO']['primary']
308  primary=t_second['nowmIO']['primary'][0].replace('file:','')
309  for i_input in reversed(range(0,i_second)):
310  t_input=chainDict['nowmTasklist'][i_input]
311  for (om,o) in t_input['nowmIO'].items():
312  if primary in o:
313  #print "found",primary,"procuced by",om,"of",t_input['TaskName']
314  t_second['InputTask'] = t_input['TaskName']
315  t_second['InputFromOutputModule'] = om
316  #print 't_second',pprint.pformat(t_second)
317  if t_second['TaskName'].startswith('HARVEST'):
318  chainDict.update(copy.deepcopy(self.defaultHarvest))
319  chainDict['DQMConfigCacheID']=t_second['ConfigCacheID']
320  ## the info are not in the task specific dict but in the general dict
321  #t_input.update(copy.deepcopy(self.defaultHarvest))
322  #t_input['DQMConfigCacheID']=t_second['ConfigCacheID']
323  break
324 
325  ## there is in fact only one acquisition era
326  #if len(set(chainDict['AcquisitionEra'].values()))==1:
327  # print "setting only one acq"
328  if acqEra:
329  chainDict['AcquisitionEra'] = chainDict['AcquisitionEra'].values()[0]
330 
331  ## clean things up now
332  itask=0
333  if self.keep:
334  for i in self.keep:
335  if type(i)==int and i < len(chainDict['nowmTasklist']):
336  chainDict['nowmTasklist'][i]['KeepOutput']=True
337  for (i,t) in enumerate(chainDict['nowmTasklist']):
338  if t['TaskName'].startswith('HARVEST'):
339  continue
340  if not self.keep:
341  t['KeepOutput']=True
342  elif t['TaskName'] in self.keep:
343  t['KeepOutput']=True
344  t.pop('nowmIO')
345  itask+=1
346  chainDict['Task%d'%(itask)]=t
347 
348 
349  ##
350 
351 
352  ## provide the number of tasks
353  chainDict['TaskChain']=itask#len(chainDict['nowmTasklist'])
354 
355  chainDict.pop('nowmTasklist')
356  self.chainDicts[n]=chainDict
357 
358 
359  return 0
360 
361  def uploadConf(self,filePath,label,where):
362  labelInCouch=self.label+'_'+label
363  cacheName=filePath.split('/')[-1]
364  if self.testMode:
365  self.count+=1
366  print '\tFake upload of',filePath,'to couch with label',labelInCouch
367  return self.count
368  else:
369  try:
370  from modules.wma import upload_to_couch,DATABASE_NAME
371  except:
372  print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
373  print '\n\t QUIT\n'
374  sys.exit(-16)
375 
376  if cacheName in self.couchCache:
377  print "Not re-uploading",filePath,"to",where,"for",label
378  cacheId=self.couchCache[cacheName]
379  else:
380  print "Loading",filePath,"to",where,"for",label
381  ## totally fork the upload to couch to prevent cross loading of process configurations
382  pool = multiprocessing.Pool(1)
383  cacheIds = pool.map( upload_to_couch_oneArg, [(filePath,labelInCouch,self.user,self.group,where)] )
384  cacheId = cacheIds[0]
385  self.couchCache[cacheName]=cacheId
386  return cacheId
387 
388  def upload(self):
389  for (n,d) in self.chainDicts.items():
390  for it in d:
391  if it.startswith("Task") and it!='TaskChain':
392  #upload
393  couchID=self.uploadConf(d[it]['ConfigCacheID'],
394  str(n)+d[it]['TaskName'],
395  d['CouchURL']
396  )
397  print d[it]['ConfigCacheID']," uploaded to couchDB for",str(n),"with ID",couchID
398  d[it]['ConfigCacheID']=couchID
399  if it =='DQMConfigCacheID':
400  couchID=self.uploadConf(d['DQMConfigCacheID'],
401  str(n)+'harvesting',
402  d['CouchURL']
403  )
404  print d['DQMConfigCacheID'],"uploaded to couchDB for",str(n),"with ID",couchID
405  d['DQMConfigCacheID']=couchID
406 
407 
408  def submit(self):
409  try:
410  from modules.wma import makeRequest,approveRequest
411  from wmcontrol import random_sleep
412  print '\n\tFound wmcontrol\n'
413  except:
414  print '\n\tUnable to find wmcontrol modules. Please include it in your python path\n'
415  if not self.testMode:
416  print '\n\t QUIT\n'
417  sys.exit(-17)
418 
419  import pprint
420  for (n,d) in self.chainDicts.items():
421  if self.testMode:
422  print "Only viewing request",n
423  print pprint.pprint(d)
424  else:
425  #submit to wmagent each dict
426  print "For eyes before submitting",n
427  print pprint.pprint(d)
428  print "Submitting",n,"..........."
429  workFlow=makeRequest(self.wmagent,d,encodeDict=True)
430  approveRequest(self.wmagent,workFlow)
431  print "...........",n,"submitted"
432  random_sleep()
433 
434 
435 
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:7
list object
Definition: dbtoconf.py:77
def performInjectionOptionTest
def upload_to_couch_oneArg
double split
Definition: MVATrainer.cc:139