CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 import sys, os
2 
3 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
4 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-',
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_identity':'id-',
51  'relval_machine': 'mach-',
52  'relval_unsch': 'unsch-',
53  'relval_premix': 'premix-',
54  'relval_miniaod': 'miniaod-'
55  }
56 
57  self.files = ['relval_standard' ,
58  'relval_highstats',
59  'relval_pileup',
60  'relval_generator',
61  'relval_extendedgen',
62  'relval_production',
63  'relval_ged',
64  'relval_upgrade',
65  'relval_identity',
66  'relval_machine',
67  'relval_unsch',
68  'relval_premix',
69  'relval_miniaod'
70  ]
71  self.filesDefault = {'relval_standard':True ,
72  'relval_highstats':True ,
73  'relval_pileup':True,
74  'relval_generator':True,
75  'relval_extendedgen':True,
76  'relval_production':True,
77  'relval_ged':True,
78  'relval_upgrade':False,
79  'relval_identity':False,
80  'relval_machine':True,
81  'relval_unsch':True,
82  'relval_premix':True,
83  'relval_miniaod':True
84  }
85 
86  self.relvalModule = None
87 
88  return
89 
90  def makeCmd(self, step):
91 
92  cmd = ''
93  cfg = None
94  input = None
95  for k,v in step.items():
96  if 'no_exec' in k : continue # we want to really run it ...
97  if k.lower() == 'cfg':
98  cfg = v
99  continue # do not append to cmd, return separately
100  if k.lower() == 'input':
101  input = v
102  continue # do not append to cmd, return separately
103 
104  #chain the configs
105  #if k.lower() == '--python':
106  # v = 'step%d_%s'%(index,v)
107  cmd += ' ' + k + ' ' + str(v)
108  return cfg, input, cmd
109 
110  def makeStep(self,step,overrides):
112  if len(overrides.keys()) > 0:
113  copyStep=merge([overrides]+[step])
114  return copyStep
115  else:
116  return step
117 
118  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
119 
120  prefix = self.filesPrefMap[fileNameIn]
121 
122  print "processing", fileNameIn
123 
124  try:
125  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
126  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
127  except Exception, e:
128  print "ERROR importing file ", fileNameIn, str(e)
129  return
130 
131  if useInput is not None:
132  print "request for INPUT for ", useInput
133 
134 
135  fromInput={}
136 
137  if useInput:
138  for i in useInput:
139  if ':' in i:
140  (ik,il)=i.split(':')
141  if ik=='all':
142  for k in self.relvalModule.workflows.keys():
143  fromInput[float(k)]=int(il)
144  else:
145  fromInput[float(ik)]=int(il)
146  else:
147  if i=='all':
148  for k in self.relvalModule.workflows.keys():
149  fromInput[float(k)]=0
150  else:
151  fromInput[float(i)]=0
152 
153  if fromScratch:
154  fromScratch=map(float,fromScratch)
155  for num in fromScratch:
156  if num in fromInput:
157  fromInput.pop(num)
158  #overwrite steps
159  if self.overWrite:
160  for p in self.overWrite:
161  self.relvalModule.steps.overwrite(p)
162 
163  #change the origin of dataset on the fly
164  if refRel:
165  if ',' in refRel:
166  refRels=refRel.split(',')
167  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
168  return
169  self.relvalModule.changeRefRelease(
170  self.relvalModule.steps,
171  zip(self.relvalModule.baseDataSetRelease,refRels)
172  )
173  else:
174  self.relvalModule.changeRefRelease(
175  self.relvalModule.steps,
176  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
177  )
178 
179 
180  for num, wfInfo in self.relvalModule.workflows.items():
181  commands=[]
182  wfName = wfInfo[0]
183  stepList = wfInfo[1]
184  stepOverrides=wfInfo.overrides
185  # if no explicit name given for the workflow, use the name of step1
186  if wfName.strip() == '': wfName = stepList[0]
187  # option to specialize the wf as the third item in the WF list
188  addTo=None
189  addCom=None
190  if len(wfInfo)>=3:
191  addCom=wfInfo[2]
192  if not type(addCom)==list: addCom=[addCom]
193  #print 'added dict',addCom
194  if len(wfInfo)>=4:
195  addTo=wfInfo[3]
196  #pad with 0
197  while len(addTo)!=len(stepList):
198  addTo.append(0)
199 
200  name=wfName
201  stepIndex=0
202  ranStepList=[]
203 
204  #first resolve INPUT possibilities
205  if num in fromInput:
206  ilevel=fromInput[num]
207  #print num,ilevel
208  for (stepIr,step) in enumerate(reversed(stepList)):
209  stepName=step
210  stepI=(len(stepList)-stepIr)-1
211  #print stepIr,step,stepI,ilevel
212  if stepI>ilevel:
213  #print "ignoring"
214  continue
215  if stepI!=0:
216  testName='__'.join(stepList[0:stepI+1])+'INPUT'
217  else:
218  testName=step+'INPUT'
219  #print "JR",stepI,stepIr,testName,stepList
220  if testName in self.relvalModule.steps.keys():
221  #print "JR",stepI,stepIr
222  stepList[stepI]=testName
223  #pop the rest in the list
224  #print "\tmod prepop",stepList
225  for p in range(stepI):
226  stepList.pop(0)
227  #print "\t\tmod",stepList
228  break
229 
230 
231  for (stepI,step) in enumerate(stepList):
232  stepName=step
233  if self.wm:
234  #cannot put a certain number of things in wm
235  if stepName in [
236  #'HARVEST','HARVESTD','HARVESTDreHLT',
237  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
238  ]:
239  continue
240 
241  #replace stepName is needed
242  #if stepName in self.replaceStep
243  if len(name) > 0 : name += '+'
244  #any step can be mirrored with INPUT
245  ## maybe we want too level deep input
246  """
247  if num in fromInput:
248  if step+'INPUT' in self.relvalModule.steps.keys():
249  stepName = step+"INPUT"
250  stepList.remove(step)
251  stepList.insert(stepIndex,stepName)
252  """
253  name += stepName
254  if addCom and (not addTo or addTo[stepIndex]==1):
256  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
257  cfg, input, opts = self.makeCmd(copyStep)
258  else:
259  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
260 
261  if input and cfg :
262  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
263  raise MatrixException(msg)
264 
265  if input:
266  cmd = input
267  if self.noRun:
268  cmd.run=[]
269  else:
270  if cfg:
271  cmd = 'cmsDriver.py '+cfg+' '+opts
272  else:
273  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
274  if self.wm:
275  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
276  if self.addCommand:
277  if self.apply:
278  if stepIndex in self.apply or stepName in self.apply:
279  cmd +=' '+self.addCommand
280  else:
281  cmd +=' '+self.addCommand
282  if self.wm and self.revertDqmio=='yes':
283  cmd=cmd.replace('DQMIO','DQM')
284  cmd=cmd.replace('--filetype DQM','')
285  commands.append(cmd)
286  ranStepList.append(stepName)
287  stepIndex+=1
288 
289  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
290 
291  return
292 
293 
294  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
295 
296  if selected:
297  selected=map(float,selected)
298  for matrixFile in self.files:
299 
300  self.reset(what)
301 
302  if self.what != 'all' and self.what not in matrixFile:
303  print "ignoring non-requested file",matrixFile
304  continue
305 
306  if self.what == 'all' and not self.filesDefault[matrixFile]:
307  print "ignoring file not used by default (enable with -w)",matrixFile
308  continue
309 
310  try:
311  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
312  except Exception, e:
313  print "ERROR reading file:", matrixFile, str(e)
314  raise
315 
316  if not self.workFlowSteps: continue
317 
318  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
319  outFile = open(dataFileName,'w')
320 
321  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
322  ids = self.workFlowSteps.keys()
323  ids.sort()
324  indexAndSteps=[]
325 
326  writtenWF=0
327  for key in ids:
328  if selected and not (key[0] in selected):
329  continue
330  #trick to skip the HImix IB test
331  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
332  num, name, commands, stepList = self.workFlowSteps[key]
333 
334  wfName,stepNames= name.split('+',1)
335 
336  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
337  stepNames=stepNames.replace('+SKIMCOSD','')
338  stepNames=stepNames.replace('+SKIMD','')
339  if 'HARVEST' in stepNames:
340  #find out automatically what to remove
341  exactb=stepNames.index('+HARVEST')
342  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
343  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
344  otherSteps = None
345  if '+' in stepNames:
346  step1,otherSteps = stepNames.split('+',1)
347 
348  line = str(num) + ' ++ '+ wfName
349  if otherSteps and not step1Only:
350  line += ' ++ ' +otherSteps.replace('+',',')
351  else:
352  line += ' ++ none'
353  inputInfo=None
354  if not isinstance(commands[0],str):
355  inputInfo=commands[0]
356  if otherSteps:
357  for (i,c) in enumerate(otherSteps.split('+')):
358  #pad with set
359  for p in range(len(indexAndSteps),i+2):
360  indexAndSteps.append(set())
361  indexAndSteps[i+1].add((c,commands[i+1]))
362 
363  if inputInfo :
364  #skip the samples from INPUT when step1Only is on
365  if step1Only: continue
366  line += ' ++ REALDATA: '+inputInfo.dataSet
367  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
368  line += ', FILES: ' +str(inputInfo.files)
369  line += ', EVENTS: '+str(inputInfo.events)
370  if inputInfo.label!='':
371  line += ', LABEL: ' +inputInfo.label
372  line += ', LOCATION:'+inputInfo.location
373  line += ' @@@'
374  else:
375  line += ' @@@ '+commands[0]
376  if self.revertDqmio=='yes':
377  line=line.replace('DQMIO','DQM')
378  writtenWF+=1
379  outFile.write(line+'\n')
380 
381 
382  outFile.write('\n'+'\n')
383  if step1Only: continue
384 
385  for (index,s) in enumerate(indexAndSteps):
386  for (stepName,cmd) in s:
387  stepIndex=index+1
388  if 'dasquery.log' in cmd: continue
389  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
390  if self.revertDqmio=='yes':
391  line=line.replace('DQMIO','DQM')
392  outFile.write(line+'\n')
393  outFile.write('\n'+'\n')
394  outFile.close()
395  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
396  return
397 
398  def workFlowsByLocation(self, cafVeto=True):
399  # Check if we are on CAF
400  onCAF = False
401  if 'cms/caf/cms' in os.environ['CMS_PATH']:
402  onCAF = True
403 
404  workflows = []
405  for workflow in self.workFlows:
406  if isinstance(workflow.cmds[0], InputInfo):
407  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
408  continue
409  workflows.append(workflow)
410 
411  return workflows
412 
413  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
414  if selected: selected = map(float,selected)
415  wfs = self.workFlowsByLocation(cafVeto)
416  maxLen = 100 # for summary, limit width of output
417  fmt1 = "%-6s %-35s [1]: %s ..."
418  fmt2 = " %35s [%d]: %s ..."
419  print "\nfound a total of ", len(wfs), ' workflows:'
420  if selected:
421  print " of which the following", len(selected), 'were selected:'
422  #-ap for now:
423  maxLen = -1 # for individual listing, no limit on width
424  fmt1 = "%-6s %-35s [1]: %s "
425  fmt2 = " %35s [%d]: %s"
426 
427  N=[]
428  for wf in wfs:
429  if selected and float(wf.numId) not in selected: continue
430  if extended: print ''
431  #pad with zeros
432  for i in range(len(N),len(wf.cmds)): N.append(0)
433  N[len(wf.cmds)-1]+=1
434  wfName, stepNames = wf.nameId.split('+',1)
435  for i,s in enumerate(wf.cmds):
436  if extended:
437  if i==0:
438  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
439  else:
440  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
441  else:
442  print "%-6s %-35s "% (wf.numId, stepNames)
443  break
444  print ''
445  for i,n in enumerate(N):
446  if n: print n,'workflows with',i+1,'steps'
447 
448  return
449 
450  def createWorkFlows(self, fileNameIn):
451 
452  prefixIn = self.filesPrefMap[fileNameIn]
453 
454  # get through the list of items and update the requested workflows only
455  keyList = self.workFlowSteps.keys()
456  ids = []
457  for item in keyList:
458  id, pref = item
459  if pref != prefixIn : continue
460  ids.append(id)
461  ids.sort()
462  for key in ids:
463  val = self.workFlowSteps[(key,prefixIn)]
464  num, name, commands, stepList = val
465  nameId = str(num)+'_'+name
466  if nameId in self.nameList:
467  print "==> duplicate name found for ", nameId
468  print ' keeping : ', self.nameList[nameId]
469  print ' ignoring : ', val
470  else:
471  self.nameList[nameId] = val
472 
473  self.workFlows.append(WorkFlow(num, name, commands=commands))
474 
475  return
476 
477  def prepare(self, useInput=None, refRel='', fromScratch=None):
478 
479  for matrixFile in self.files:
480  if self.what != 'all' and self.what not in matrixFile:
481  print "ignoring non-requested file",matrixFile
482  continue
483  if self.what == 'all' and not self.filesDefault[matrixFile]:
484  print "ignoring",matrixFile,"from default matrix"
485  continue
486 
487  try:
488  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
489  except Exception, e:
490  print "ERROR reading file:", matrixFile, str(e)
491  raise
492 
493  try:
494  self.createWorkFlows(matrixFile)
495  except Exception, e:
496  print "ERROR creating workflows :", str(e)
497  raise
498 
499 
500  def show(self, selected=None, extended=True, cafVeto=True):
501 
502  self.showWorkFlows(selected, extended, cafVeto)
503  print '\n','-'*80,'\n'
504 
505 
506  def updateDB(self):
507 
508  import pickle
509  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
510 
511  return
512 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77