CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-' ,
46  'relval_production': 'prod-' ,
47  'relval_ged': 'ged-',
48  'relval_upgrade':'upg-',
49  'relval_identity':'id-'
50  }
51 
52  self.files = ['relval_standard' ,
53  'relval_highstats',
54  'relval_pileup',
55  'relval_generator',
56  'relval_production',
57  'relval_ged',
58  'relval_upgrade',
59  'relval_identity'
60  ]
61 
62  self.relvalModule = None
63 
64  return
65 
66  def makeCmd(self, step):
67 
68  cmd = ''
69  cfg = None
70  input = None
71  for k,v in step.items():
72  if 'no_exec' in k : continue # we want to really run it ...
73  if k.lower() == 'cfg':
74  cfg = v
75  continue # do not append to cmd, return separately
76  if k.lower() == 'input':
77  input = v
78  continue # do not append to cmd, return separately
79 
80  #chain the configs
81  #if k.lower() == '--python':
82  # v = 'step%d_%s'%(index,v)
83  cmd += ' ' + k + ' ' + str(v)
84  return cfg, input, cmd
85 
86  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
87 
88  prefix = self.filesPrefMap[fileNameIn]
89 
90  print "processing ", fileNameIn
91 
92  try:
93  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
94  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
95  except Exception, e:
96  print "ERROR importing file ", fileNameIn, str(e)
97  return
98 
99  print "request for INPUT for ", useInput
100 
101 
102  fromInput={}
103 
104  if useInput:
105  for i in useInput:
106  if ':' in i:
107  (ik,il)=i.split(':')
108  if ik=='all':
109  for k in self.relvalModule.workflows.keys():
110  fromInput[float(k)]=int(il)
111  else:
112  fromInput[float(ik)]=int(il)
113  else:
114  if i=='all':
115  for k in self.relvalModule.workflows.keys():
116  fromInput[float(k)]=0
117  else:
118  fromInput[float(i)]=0
119 
120  if fromScratch:
121  fromScratch=map(float,fromScratch)
122  for num in fromScratch:
123  if num in fromInput:
124  fromInput.pop(num)
125  #overwrite steps
126  if self.overWrite:
127  for p in self.overWrite:
128  self.relvalModule.steps.overwrite(p)
129 
130  #change the origin of dataset on the fly
131  if refRel:
132  if ',' in refRel:
133  refRels=refRel.split(',')
134  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
135  return
136  self.relvalModule.changeRefRelease(
137  self.relvalModule.steps,
138  zip(self.relvalModule.baseDataSetRelease,refRels)
139  )
140  else:
141  self.relvalModule.changeRefRelease(
142  self.relvalModule.steps,
143  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
144  )
145 
146 
147  for num, wfInfo in self.relvalModule.workflows.items():
148  commands=[]
149  wfName = wfInfo[0]
150  stepList = wfInfo[1]
151  # if no explicit name given for the workflow, use the name of step1
152  if wfName.strip() == '': wfName = stepList[0]
153  # option to specialize the wf as the third item in the WF list
154  addTo=None
155  addCom=None
156  if len(wfInfo)>=3:
157  addCom=wfInfo[2]
158  if not type(addCom)==list: addCom=[addCom]
159  #print 'added dict',addCom
160  if len(wfInfo)>=4:
161  addTo=wfInfo[3]
162  #pad with 0
163  while len(addTo)!=len(stepList):
164  addTo.append(0)
165 
166  name=wfName
167  stepIndex=0
168  ranStepList=[]
169 
170  #first resolve INPUT possibilities
171  if num in fromInput:
172  ilevel=fromInput[num]
173  #print num,ilevel
174  for (stepIr,step) in enumerate(reversed(stepList)):
175  stepName=step
176  stepI=(len(stepList)-stepIr)-1
177  #print stepIr,step,stepI,ilevel
178  if stepI>ilevel:
179  #print "ignoring"
180  continue
181  if stepI!=0:
182  testName='__'.join(stepList[0:stepI+1])+'INPUT'
183  else:
184  testName=step+'INPUT'
185  #print "JR",stepI,stepIr,testName,stepList
186  if testName in self.relvalModule.steps.keys():
187  #print "JR",stepI,stepIr
188  stepList[stepI]=testName
189  #pop the rest in the list
190  #print "\tmod prepop",stepList
191  for p in range(stepI):
192  stepList.pop(0)
193  #print "\t\tmod",stepList
194  break
195 
196 
197  for (stepI,step) in enumerate(stepList):
198  stepName=step
199  if self.wm:
200  #cannot put a certain number of things in wm
201  if stepName in [
202  #'HARVEST','HARVESTD','HARVESTDreHLT',
203  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
204  ]:
205  continue
206 
207  #replace stepName is needed
208  #if stepName in self.replaceStep
209  if len(name) > 0 : name += '+'
210  #any step can be mirrored with INPUT
211  ## maybe we want too level deep input
212  """
213  if num in fromInput:
214  if step+'INPUT' in self.relvalModule.steps.keys():
215  stepName = step+"INPUT"
216  stepList.remove(step)
217  stepList.insert(stepIndex,stepName)
218  """
219  name += stepName
220 
221  if addCom and (not addTo or addTo[stepIndex]==1):
223  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
224  cfg, input, opts = self.makeCmd(copyStep)
225  else:
226  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
227 
228  if input and cfg :
229  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
230  raise MatrixException(msg)
231 
232  if input:
233  cmd = input
234  if self.noRun:
235  cmd.run=[]
236  else:
237  if cfg:
238  cmd = 'cmsDriver.py '+cfg+' '+opts
239  else:
240  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
241  if self.wm:
242  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
243  if self.addCommand:
244  if self.apply:
245  if stepIndex in self.apply or stepName in self.apply:
246  cmd +=' '+self.addCommand
247  else:
248  cmd +=' '+self.addCommand
249  if self.wm and self.revertDqmio=='yes':
250  cmd=cmd.replace('DQMROOT','DQM')
251  cmd=cmd.replace('--filetype DQM','')
252  commands.append(cmd)
253  ranStepList.append(stepName)
254  stepIndex+=1
255 
256  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
257 
258  return
259 
260 
261  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
262 
263  if selected:
264  selected=map(float,selected)
265  for matrixFile in self.files:
266 
267  self.reset(what)
268 
269  if self.what != 'all' and self.what not in matrixFile:
270  print "ignoring non-requested file",matrixFile
271  continue
272 
273  try:
274  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
275  except Exception, e:
276  print "ERROR reading file:", matrixFile, str(e)
277  raise
278 
279  if not self.workFlowSteps: continue
280 
281  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
282  outFile = open(dataFileName,'w')
283 
284  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
285  ids = self.workFlowSteps.keys()
286  ids.sort()
287  indexAndSteps=[]
288 
289  writtenWF=0
290  for key in ids:
291  if selected and not (key[0] in selected):
292  continue
293  #trick to skip the HImix IB test
294  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
295  num, name, commands, stepList = self.workFlowSteps[key]
296 
297  wfName,stepNames= name.split('+',1)
298 
299  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
300  stepNames=stepNames.replace('+SKIMCOSD','')
301  stepNames=stepNames.replace('+SKIMD','')
302  if 'HARVEST' in stepNames:
303  #find out automatically what to remove
304  exactb=stepNames.index('+HARVEST')
305  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
306  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
307  otherSteps = None
308  if '+' in stepNames:
309  step1,otherSteps = stepNames.split('+',1)
310 
311  line = str(num) + ' ++ '+ wfName
312  if otherSteps and not step1Only:
313  line += ' ++ ' +otherSteps.replace('+',',')
314  else:
315  line += ' ++ none'
316  inputInfo=None
317  if not isinstance(commands[0],str):
318  inputInfo=commands[0]
319  if otherSteps:
320  for (i,c) in enumerate(otherSteps.split('+')):
321  #pad with set
322  for p in range(len(indexAndSteps),i+2):
323  indexAndSteps.append(set())
324  indexAndSteps[i+1].add((c,commands[i+1]))
325 
326  if inputInfo :
327  #skip the samples from INPUT when step1Only is on
328  if step1Only: continue
329  line += ' ++ REALDATA: '+inputInfo.dataSet
330  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
331  line += ', FILES: ' +str(inputInfo.files)
332  line += ', EVENTS: '+str(inputInfo.events)
333  if inputInfo.label!='':
334  line += ', LABEL: ' +inputInfo.label
335  line += ', LOCATION:'+inputInfo.location
336  line += ' @@@'
337  else:
338  line += ' @@@ '+commands[0]
339  if self.revertDqmio=='yes':
340  line=line.replace('DQMROOT','DQM')
341  writtenWF+=1
342  outFile.write(line+'\n')
343 
344 
345  outFile.write('\n'+'\n')
346  if step1Only: continue
347 
348  for (index,s) in enumerate(indexAndSteps):
349  for (stepName,cmd) in s:
350  stepIndex=index+1
351  if 'dasquery.log' in cmd: continue
352  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
353  if self.revertDqmio=='yes':
354  line=line.replace('DQMROOT','DQM')
355  outFile.write(line+'\n')
356  outFile.write('\n'+'\n')
357  outFile.close()
358  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
359  return
360 
361 
362  def showWorkFlows(self, selected=None, extended=True):
363  if selected: selected = map(float,selected)
364  maxLen = 100 # for summary, limit width of output
365  fmt1 = "%-6s %-35s [1]: %s ..."
366  fmt2 = " %35s [%d]: %s ..."
367  print "\nfound a total of ", len(self.workFlows), ' workflows:'
368  if selected:
369  print " of which the following", len(selected), 'were selected:'
370  #-ap for now:
371  maxLen = -1 # for individual listing, no limit on width
372  fmt1 = "%-6s %-35s [1]: %s "
373  fmt2 = " %35s [%d]: %s"
374 
375  N=[]
376  for wf in self.workFlows:
377  if selected and float(wf.numId) not in selected: continue
378  if extended: print ''
379  #pad with zeros
380  for i in range(len(N),len(wf.cmds)): N.append(0)
381  N[len(wf.cmds)-1]+=1
382  wfName, stepNames = wf.nameId.split('+',1)
383  for i,s in enumerate(wf.cmds):
384  if extended:
385  if i==0:
386  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
387  else:
388  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
389  else:
390  print "%-6s %-35s "% (wf.numId, stepNames)
391  break
392  print ''
393  for i,n in enumerate(N):
394  if n: print n,'workflows with',i+1,'steps'
395 
396  return
397 
398  def createWorkFlows(self, fileNameIn):
399 
400  prefixIn = self.filesPrefMap[fileNameIn]
401 
402  # get through the list of items and update the requested workflows only
403  keyList = self.workFlowSteps.keys()
404  ids = []
405  for item in keyList:
406  id, pref = item
407  if pref != prefixIn : continue
408  ids.append(id)
409  ids.sort()
410  for key in ids:
411  val = self.workFlowSteps[(key,prefixIn)]
412  num, name, commands, stepList = val
413  nameId = str(num)+'_'+name
414  if nameId in self.nameList:
415  print "==> duplicate name found for ", nameId
416  print ' keeping : ', self.nameList[nameId]
417  print ' ignoring : ', val
418  else:
419  self.nameList[nameId] = val
420 
421  self.workFlows.append(WorkFlow(num, name, commands=commands))
422 
423  return
424 
425  def prepare(self, useInput=None, refRel='', fromScratch=None):
426 
427  for matrixFile in self.files:
428  if self.what != 'all' and self.what not in matrixFile:
429  print "ignoring non-requested file",matrixFile
430  continue
431  if self.what == 'all' and ('upgrade' in matrixFile):
432  print "ignoring",matrixFile,"from default matrix"
433  continue
434 
435  try:
436  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
437  except Exception, e:
438  print "ERROR reading file:", matrixFile, str(e)
439  raise
440 
441  try:
442  self.createWorkFlows(matrixFile)
443  except Exception, e:
444  print "ERROR creating workflows :", str(e)
445  raise
446 
447 
448  def show(self, selected=None, extended=True):
449 
450  self.showWorkFlows(selected,extended)
451  print '\n','-'*80,'\n'
452 
453 
454  def updateDB(self):
455 
456  import pickle
457  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
458 
459  return
460 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77