CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-',
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_identity':'id-',
51  'relval_machine': 'mach-',
52  'relval_unsch': 'unsch-',
53  'relval_premix': 'premix-'
54  }
55 
56  self.files = ['relval_standard' ,
57  'relval_highstats',
58  'relval_pileup',
59  'relval_generator',
60  'relval_extendedgen',
61  'relval_production',
62  'relval_ged',
63  'relval_upgrade',
64  'relval_identity',
65  'relval_machine',
66  'relval_unsch',
67  'relval_premix'
68  ]
69  self.filesDefault = {'relval_standard':True ,
70  'relval_highstats':True ,
71  'relval_pileup':True,
72  'relval_generator':True,
73  'relval_extendedgen':True,
74  'relval_production':True,
75  'relval_ged':True,
76  'relval_upgrade':False,
77  'relval_identity':False,
78  'relval_machine':True,
79  'relval_unsch':True,
80  'relval_premix':True
81  }
82 
83  self.relvalModule = None
84 
85  return
86 
87  def makeCmd(self, step):
88 
89  cmd = ''
90  cfg = None
91  input = None
92  for k,v in step.items():
93  if 'no_exec' in k : continue # we want to really run it ...
94  if k.lower() == 'cfg':
95  cfg = v
96  continue # do not append to cmd, return separately
97  if k.lower() == 'input':
98  input = v
99  continue # do not append to cmd, return separately
100 
101  #chain the configs
102  #if k.lower() == '--python':
103  # v = 'step%d_%s'%(index,v)
104  cmd += ' ' + k + ' ' + str(v)
105  return cfg, input, cmd
106 
107  def makeStep(self,step,overrides):
109  if len(overrides.keys()) > 0:
110  copyStep=merge([overrides]+[step])
111  return copyStep
112  else:
113  return step
114 
115  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
116 
117  prefix = self.filesPrefMap[fileNameIn]
118 
119  print "processing", fileNameIn
120 
121  try:
122  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
123  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
124  except Exception, e:
125  print "ERROR importing file ", fileNameIn, str(e)
126  return
127 
128  if useInput is not None:
129  print "request for INPUT for ", useInput
130 
131 
132  fromInput={}
133 
134  if useInput:
135  for i in useInput:
136  if ':' in i:
137  (ik,il)=i.split(':')
138  if ik=='all':
139  for k in self.relvalModule.workflows.keys():
140  fromInput[float(k)]=int(il)
141  else:
142  fromInput[float(ik)]=int(il)
143  else:
144  if i=='all':
145  for k in self.relvalModule.workflows.keys():
146  fromInput[float(k)]=0
147  else:
148  fromInput[float(i)]=0
149 
150  if fromScratch:
151  fromScratch=map(float,fromScratch)
152  for num in fromScratch:
153  if num in fromInput:
154  fromInput.pop(num)
155  #overwrite steps
156  if self.overWrite:
157  for p in self.overWrite:
158  self.relvalModule.steps.overwrite(p)
159 
160  #change the origin of dataset on the fly
161  if refRel:
162  if ',' in refRel:
163  refRels=refRel.split(',')
164  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
165  return
166  self.relvalModule.changeRefRelease(
167  self.relvalModule.steps,
168  zip(self.relvalModule.baseDataSetRelease,refRels)
169  )
170  else:
171  self.relvalModule.changeRefRelease(
172  self.relvalModule.steps,
173  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
174  )
175 
176 
177  for num, wfInfo in self.relvalModule.workflows.items():
178  commands=[]
179  wfName = wfInfo[0]
180  stepList = wfInfo[1]
181  stepOverrides=wfInfo.overrides
182  # if no explicit name given for the workflow, use the name of step1
183  if wfName.strip() == '': wfName = stepList[0]
184  # option to specialize the wf as the third item in the WF list
185  addTo=None
186  addCom=None
187  if len(wfInfo)>=3:
188  addCom=wfInfo[2]
189  if not type(addCom)==list: addCom=[addCom]
190  #print 'added dict',addCom
191  if len(wfInfo)>=4:
192  addTo=wfInfo[3]
193  #pad with 0
194  while len(addTo)!=len(stepList):
195  addTo.append(0)
196 
197  name=wfName
198  stepIndex=0
199  ranStepList=[]
200 
201  #first resolve INPUT possibilities
202  if num in fromInput:
203  ilevel=fromInput[num]
204  #print num,ilevel
205  for (stepIr,step) in enumerate(reversed(stepList)):
206  stepName=step
207  stepI=(len(stepList)-stepIr)-1
208  #print stepIr,step,stepI,ilevel
209  if stepI>ilevel:
210  #print "ignoring"
211  continue
212  if stepI!=0:
213  testName='__'.join(stepList[0:stepI+1])+'INPUT'
214  else:
215  testName=step+'INPUT'
216  #print "JR",stepI,stepIr,testName,stepList
217  if testName in self.relvalModule.steps.keys():
218  #print "JR",stepI,stepIr
219  stepList[stepI]=testName
220  #pop the rest in the list
221  #print "\tmod prepop",stepList
222  for p in range(stepI):
223  stepList.pop(0)
224  #print "\t\tmod",stepList
225  break
226 
227 
228  for (stepI,step) in enumerate(stepList):
229  stepName=step
230  if self.wm:
231  #cannot put a certain number of things in wm
232  if stepName in [
233  #'HARVEST','HARVESTD','HARVESTDreHLT',
234  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
235  ]:
236  continue
237 
238  #replace stepName is needed
239  #if stepName in self.replaceStep
240  if len(name) > 0 : name += '+'
241  #any step can be mirrored with INPUT
242  ## maybe we want too level deep input
243  """
244  if num in fromInput:
245  if step+'INPUT' in self.relvalModule.steps.keys():
246  stepName = step+"INPUT"
247  stepList.remove(step)
248  stepList.insert(stepIndex,stepName)
249  """
250  name += stepName
251  if addCom and (not addTo or addTo[stepIndex]==1):
253  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
254  cfg, input, opts = self.makeCmd(copyStep)
255  else:
256  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
257 
258  if input and cfg :
259  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
260  raise MatrixException(msg)
261 
262  if input:
263  cmd = input
264  if self.noRun:
265  cmd.run=[]
266  else:
267  if cfg:
268  cmd = 'cmsDriver.py '+cfg+' '+opts
269  else:
270  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
271  if self.wm:
272  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
273  if self.addCommand:
274  if self.apply:
275  if stepIndex in self.apply or stepName in self.apply:
276  cmd +=' '+self.addCommand
277  else:
278  cmd +=' '+self.addCommand
279  if self.wm and self.revertDqmio=='yes':
280  cmd=cmd.replace('DQMIO','DQM')
281  cmd=cmd.replace('--filetype DQM','')
282  commands.append(cmd)
283  ranStepList.append(stepName)
284  stepIndex+=1
285 
286  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
287 
288  return
289 
290 
291  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
292 
293  if selected:
294  selected=map(float,selected)
295  for matrixFile in self.files:
296 
297  self.reset(what)
298 
299  if self.what != 'all' and self.what not in matrixFile:
300  print "ignoring non-requested file",matrixFile
301  continue
302 
303  if self.what == 'all' and not self.filesDefault[matrixFile]:
304  print "ignoring file not used by default (enable with -w)",matrixFile
305  continue
306 
307  try:
308  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
309  except Exception, e:
310  print "ERROR reading file:", matrixFile, str(e)
311  raise
312 
313  if not self.workFlowSteps: continue
314 
315  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
316  outFile = open(dataFileName,'w')
317 
318  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
319  ids = self.workFlowSteps.keys()
320  ids.sort()
321  indexAndSteps=[]
322 
323  writtenWF=0
324  for key in ids:
325  if selected and not (key[0] in selected):
326  continue
327  #trick to skip the HImix IB test
328  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
329  num, name, commands, stepList = self.workFlowSteps[key]
330 
331  wfName,stepNames= name.split('+',1)
332 
333  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
334  stepNames=stepNames.replace('+SKIMCOSD','')
335  stepNames=stepNames.replace('+SKIMD','')
336  if 'HARVEST' in stepNames:
337  #find out automatically what to remove
338  exactb=stepNames.index('+HARVEST')
339  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
340  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
341  otherSteps = None
342  if '+' in stepNames:
343  step1,otherSteps = stepNames.split('+',1)
344 
345  line = str(num) + ' ++ '+ wfName
346  if otherSteps and not step1Only:
347  line += ' ++ ' +otherSteps.replace('+',',')
348  else:
349  line += ' ++ none'
350  inputInfo=None
351  if not isinstance(commands[0],str):
352  inputInfo=commands[0]
353  if otherSteps:
354  for (i,c) in enumerate(otherSteps.split('+')):
355  #pad with set
356  for p in range(len(indexAndSteps),i+2):
357  indexAndSteps.append(set())
358  indexAndSteps[i+1].add((c,commands[i+1]))
359 
360  if inputInfo :
361  #skip the samples from INPUT when step1Only is on
362  if step1Only: continue
363  line += ' ++ REALDATA: '+inputInfo.dataSet
364  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
365  line += ', FILES: ' +str(inputInfo.files)
366  line += ', EVENTS: '+str(inputInfo.events)
367  if inputInfo.label!='':
368  line += ', LABEL: ' +inputInfo.label
369  line += ', LOCATION:'+inputInfo.location
370  line += ' @@@'
371  else:
372  line += ' @@@ '+commands[0]
373  if self.revertDqmio=='yes':
374  line=line.replace('DQMIO','DQM')
375  writtenWF+=1
376  outFile.write(line+'\n')
377 
378 
379  outFile.write('\n'+'\n')
380  if step1Only: continue
381 
382  for (index,s) in enumerate(indexAndSteps):
383  for (stepName,cmd) in s:
384  stepIndex=index+1
385  if 'dasquery.log' in cmd: continue
386  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
387  if self.revertDqmio=='yes':
388  line=line.replace('DQMIO','DQM')
389  outFile.write(line+'\n')
390  outFile.write('\n'+'\n')
391  outFile.close()
392  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
393  return
394 
395 
396  def showWorkFlows(self, selected=None, extended=True):
397  if selected: selected = map(float,selected)
398  maxLen = 100 # for summary, limit width of output
399  fmt1 = "%-6s %-35s [1]: %s ..."
400  fmt2 = " %35s [%d]: %s ..."
401  print "\nfound a total of ", len(self.workFlows), ' workflows:'
402  if selected:
403  print " of which the following", len(selected), 'were selected:'
404  #-ap for now:
405  maxLen = -1 # for individual listing, no limit on width
406  fmt1 = "%-6s %-35s [1]: %s "
407  fmt2 = " %35s [%d]: %s"
408 
409  N=[]
410  for wf in self.workFlows:
411  if selected and float(wf.numId) not in selected: continue
412  if extended: print ''
413  #pad with zeros
414  for i in range(len(N),len(wf.cmds)): N.append(0)
415  N[len(wf.cmds)-1]+=1
416  wfName, stepNames = wf.nameId.split('+',1)
417  for i,s in enumerate(wf.cmds):
418  if extended:
419  if i==0:
420  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
421  else:
422  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
423  else:
424  print "%-6s %-35s "% (wf.numId, stepNames)
425  break
426  print ''
427  for i,n in enumerate(N):
428  if n: print n,'workflows with',i+1,'steps'
429 
430  return
431 
432  def createWorkFlows(self, fileNameIn):
433 
434  prefixIn = self.filesPrefMap[fileNameIn]
435 
436  # get through the list of items and update the requested workflows only
437  keyList = self.workFlowSteps.keys()
438  ids = []
439  for item in keyList:
440  id, pref = item
441  if pref != prefixIn : continue
442  ids.append(id)
443  ids.sort()
444  for key in ids:
445  val = self.workFlowSteps[(key,prefixIn)]
446  num, name, commands, stepList = val
447  nameId = str(num)+'_'+name
448  if nameId in self.nameList:
449  print "==> duplicate name found for ", nameId
450  print ' keeping : ', self.nameList[nameId]
451  print ' ignoring : ', val
452  else:
453  self.nameList[nameId] = val
454 
455  self.workFlows.append(WorkFlow(num, name, commands=commands))
456 
457  return
458 
459  def prepare(self, useInput=None, refRel='', fromScratch=None):
460 
461  for matrixFile in self.files:
462  if self.what != 'all' and self.what not in matrixFile:
463  print "ignoring non-requested file",matrixFile
464  continue
465  if self.what == 'all' and not self.filesDefault[matrixFile]:
466  print "ignoring",matrixFile,"from default matrix"
467  continue
468 
469  try:
470  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
471  except Exception, e:
472  print "ERROR reading file:", matrixFile, str(e)
473  raise
474 
475  try:
476  self.createWorkFlows(matrixFile)
477  except Exception, e:
478  print "ERROR creating workflows :", str(e)
479  raise
480 
481 
482  def show(self, selected=None, extended=True):
483 
484  self.showWorkFlows(selected,extended)
485  print '\n','-'*80,'\n'
486 
487 
488  def updateDB(self):
489 
490  import pickle
491  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
492 
493  return
494 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
tuple zip
Definition: archive.py:476
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77