CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-' ,
46  'relval_production': 'prod-' ,
47  'relval_ged': 'ged-',
48  'relval_upgrade':'upg-',
49  'relval_identity':'id-',
50  'relval_machine': 'mach-'
51  }
52 
53  self.files = ['relval_standard' ,
54  'relval_highstats',
55  'relval_pileup',
56  'relval_generator',
57  'relval_production',
58  'relval_ged',
59  'relval_upgrade',
60  'relval_identity',
61  'relval_machine'
62  ]
63 
64  self.relvalModule = None
65 
66  return
67 
68  def makeCmd(self, step):
69 
70  cmd = ''
71  cfg = None
72  input = None
73  for k,v in step.items():
74  if 'no_exec' in k : continue # we want to really run it ...
75  if k.lower() == 'cfg':
76  cfg = v
77  continue # do not append to cmd, return separately
78  if k.lower() == 'input':
79  input = v
80  continue # do not append to cmd, return separately
81 
82  #chain the configs
83  #if k.lower() == '--python':
84  # v = 'step%d_%s'%(index,v)
85  cmd += ' ' + k + ' ' + str(v)
86  return cfg, input, cmd
87 
88  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
89 
90  prefix = self.filesPrefMap[fileNameIn]
91 
92  print "processing ", fileNameIn
93 
94  try:
95  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
96  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
97  except Exception, e:
98  print "ERROR importing file ", fileNameIn, str(e)
99  return
100 
101  print "request for INPUT for ", useInput
102 
103 
104  fromInput={}
105 
106  if useInput:
107  for i in useInput:
108  if ':' in i:
109  (ik,il)=i.split(':')
110  if ik=='all':
111  for k in self.relvalModule.workflows.keys():
112  fromInput[float(k)]=int(il)
113  else:
114  fromInput[float(ik)]=int(il)
115  else:
116  if i=='all':
117  for k in self.relvalModule.workflows.keys():
118  fromInput[float(k)]=0
119  else:
120  fromInput[float(i)]=0
121 
122  if fromScratch:
123  fromScratch=map(float,fromScratch)
124  for num in fromScratch:
125  if num in fromInput:
126  fromInput.pop(num)
127  #overwrite steps
128  if self.overWrite:
129  for p in self.overWrite:
130  self.relvalModule.steps.overwrite(p)
131 
132  #change the origin of dataset on the fly
133  if refRel:
134  if ',' in refRel:
135  refRels=refRel.split(',')
136  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
137  return
138  self.relvalModule.changeRefRelease(
139  self.relvalModule.steps,
140  zip(self.relvalModule.baseDataSetRelease,refRels)
141  )
142  else:
143  self.relvalModule.changeRefRelease(
144  self.relvalModule.steps,
145  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
146  )
147 
148 
149  for num, wfInfo in self.relvalModule.workflows.items():
150  commands=[]
151  wfName = wfInfo[0]
152  stepList = wfInfo[1]
153  # if no explicit name given for the workflow, use the name of step1
154  if wfName.strip() == '': wfName = stepList[0]
155  # option to specialize the wf as the third item in the WF list
156  addTo=None
157  addCom=None
158  if len(wfInfo)>=3:
159  addCom=wfInfo[2]
160  if not type(addCom)==list: addCom=[addCom]
161  #print 'added dict',addCom
162  if len(wfInfo)>=4:
163  addTo=wfInfo[3]
164  #pad with 0
165  while len(addTo)!=len(stepList):
166  addTo.append(0)
167 
168  name=wfName
169  stepIndex=0
170  ranStepList=[]
171 
172  #first resolve INPUT possibilities
173  if num in fromInput:
174  ilevel=fromInput[num]
175  #print num,ilevel
176  for (stepIr,step) in enumerate(reversed(stepList)):
177  stepName=step
178  stepI=(len(stepList)-stepIr)-1
179  #print stepIr,step,stepI,ilevel
180  if stepI>ilevel:
181  #print "ignoring"
182  continue
183  if stepI!=0:
184  testName='__'.join(stepList[0:stepI+1])+'INPUT'
185  else:
186  testName=step+'INPUT'
187  #print "JR",stepI,stepIr,testName,stepList
188  if testName in self.relvalModule.steps.keys():
189  #print "JR",stepI,stepIr
190  stepList[stepI]=testName
191  #pop the rest in the list
192  #print "\tmod prepop",stepList
193  for p in range(stepI):
194  stepList.pop(0)
195  #print "\t\tmod",stepList
196  break
197 
198 
199  for (stepI,step) in enumerate(stepList):
200  stepName=step
201  if self.wm:
202  #cannot put a certain number of things in wm
203  if stepName in [
204  #'HARVEST','HARVESTD','HARVESTDreHLT',
205  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
206  ]:
207  continue
208 
209  #replace stepName is needed
210  #if stepName in self.replaceStep
211  if len(name) > 0 : name += '+'
212  #any step can be mirrored with INPUT
213  ## maybe we want too level deep input
214  """
215  if num in fromInput:
216  if step+'INPUT' in self.relvalModule.steps.keys():
217  stepName = step+"INPUT"
218  stepList.remove(step)
219  stepList.insert(stepIndex,stepName)
220  """
221  name += stepName
222 
223  if addCom and (not addTo or addTo[stepIndex]==1):
225  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
226  cfg, input, opts = self.makeCmd(copyStep)
227  else:
228  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
229 
230  if input and cfg :
231  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
232  raise MatrixException(msg)
233 
234  if input:
235  cmd = input
236  if self.noRun:
237  cmd.run=[]
238  else:
239  if cfg:
240  cmd = 'cmsDriver.py '+cfg+' '+opts
241  else:
242  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
243  if self.wm:
244  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
245  if self.addCommand:
246  if self.apply:
247  if stepIndex in self.apply or stepName in self.apply:
248  cmd +=' '+self.addCommand
249  else:
250  cmd +=' '+self.addCommand
251  if self.wm and self.revertDqmio=='yes':
252  cmd=cmd.replace('DQMIO','DQM')
253  cmd=cmd.replace('--filetype DQM','')
254  commands.append(cmd)
255  ranStepList.append(stepName)
256  stepIndex+=1
257 
258  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
259 
260  return
261 
262 
263  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
264 
265  if selected:
266  selected=map(float,selected)
267  for matrixFile in self.files:
268 
269  self.reset(what)
270 
271  if self.what != 'all' and self.what not in matrixFile:
272  print "ignoring non-requested file",matrixFile
273  continue
274 
275  try:
276  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
277  except Exception, e:
278  print "ERROR reading file:", matrixFile, str(e)
279  raise
280 
281  if not self.workFlowSteps: continue
282 
283  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
284  outFile = open(dataFileName,'w')
285 
286  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
287  ids = self.workFlowSteps.keys()
288  ids.sort()
289  indexAndSteps=[]
290 
291  writtenWF=0
292  for key in ids:
293  if selected and not (key[0] in selected):
294  continue
295  #trick to skip the HImix IB test
296  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
297  num, name, commands, stepList = self.workFlowSteps[key]
298 
299  wfName,stepNames= name.split('+',1)
300 
301  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
302  stepNames=stepNames.replace('+SKIMCOSD','')
303  stepNames=stepNames.replace('+SKIMD','')
304  if 'HARVEST' in stepNames:
305  #find out automatically what to remove
306  exactb=stepNames.index('+HARVEST')
307  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
308  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
309  otherSteps = None
310  if '+' in stepNames:
311  step1,otherSteps = stepNames.split('+',1)
312 
313  line = str(num) + ' ++ '+ wfName
314  if otherSteps and not step1Only:
315  line += ' ++ ' +otherSteps.replace('+',',')
316  else:
317  line += ' ++ none'
318  inputInfo=None
319  if not isinstance(commands[0],str):
320  inputInfo=commands[0]
321  if otherSteps:
322  for (i,c) in enumerate(otherSteps.split('+')):
323  #pad with set
324  for p in range(len(indexAndSteps),i+2):
325  indexAndSteps.append(set())
326  indexAndSteps[i+1].add((c,commands[i+1]))
327 
328  if inputInfo :
329  #skip the samples from INPUT when step1Only is on
330  if step1Only: continue
331  line += ' ++ REALDATA: '+inputInfo.dataSet
332  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
333  line += ', FILES: ' +str(inputInfo.files)
334  line += ', EVENTS: '+str(inputInfo.events)
335  if inputInfo.label!='':
336  line += ', LABEL: ' +inputInfo.label
337  line += ', LOCATION:'+inputInfo.location
338  line += ' @@@'
339  else:
340  line += ' @@@ '+commands[0]
341  if self.revertDqmio=='yes':
342  line=line.replace('DQMIO','DQM')
343  writtenWF+=1
344  outFile.write(line+'\n')
345 
346 
347  outFile.write('\n'+'\n')
348  if step1Only: continue
349 
350  for (index,s) in enumerate(indexAndSteps):
351  for (stepName,cmd) in s:
352  stepIndex=index+1
353  if 'dasquery.log' in cmd: continue
354  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
355  if self.revertDqmio=='yes':
356  line=line.replace('DQMIO','DQM')
357  outFile.write(line+'\n')
358  outFile.write('\n'+'\n')
359  outFile.close()
360  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
361  return
362 
363 
364  def showWorkFlows(self, selected=None, extended=True):
365  if selected: selected = map(float,selected)
366  maxLen = 100 # for summary, limit width of output
367  fmt1 = "%-6s %-35s [1]: %s ..."
368  fmt2 = " %35s [%d]: %s ..."
369  print "\nfound a total of ", len(self.workFlows), ' workflows:'
370  if selected:
371  print " of which the following", len(selected), 'were selected:'
372  #-ap for now:
373  maxLen = -1 # for individual listing, no limit on width
374  fmt1 = "%-6s %-35s [1]: %s "
375  fmt2 = " %35s [%d]: %s"
376 
377  N=[]
378  for wf in self.workFlows:
379  if selected and float(wf.numId) not in selected: continue
380  if extended: print ''
381  #pad with zeros
382  for i in range(len(N),len(wf.cmds)): N.append(0)
383  N[len(wf.cmds)-1]+=1
384  wfName, stepNames = wf.nameId.split('+',1)
385  for i,s in enumerate(wf.cmds):
386  if extended:
387  if i==0:
388  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
389  else:
390  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
391  else:
392  print "%-6s %-35s "% (wf.numId, stepNames)
393  break
394  print ''
395  for i,n in enumerate(N):
396  if n: print n,'workflows with',i+1,'steps'
397 
398  return
399 
400  def createWorkFlows(self, fileNameIn):
401 
402  prefixIn = self.filesPrefMap[fileNameIn]
403 
404  # get through the list of items and update the requested workflows only
405  keyList = self.workFlowSteps.keys()
406  ids = []
407  for item in keyList:
408  id, pref = item
409  if pref != prefixIn : continue
410  ids.append(id)
411  ids.sort()
412  for key in ids:
413  val = self.workFlowSteps[(key,prefixIn)]
414  num, name, commands, stepList = val
415  nameId = str(num)+'_'+name
416  if nameId in self.nameList:
417  print "==> duplicate name found for ", nameId
418  print ' keeping : ', self.nameList[nameId]
419  print ' ignoring : ', val
420  else:
421  self.nameList[nameId] = val
422 
423  self.workFlows.append(WorkFlow(num, name, commands=commands))
424 
425  return
426 
427  def prepare(self, useInput=None, refRel='', fromScratch=None):
428 
429  for matrixFile in self.files:
430  if self.what != 'all' and self.what not in matrixFile:
431  print "ignoring non-requested file",matrixFile
432  continue
433  if self.what == 'all' and ('upgrade' in matrixFile):
434  print "ignoring",matrixFile,"from default matrix"
435  continue
436 
437  try:
438  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
439  except Exception, e:
440  print "ERROR reading file:", matrixFile, str(e)
441  raise
442 
443  try:
444  self.createWorkFlows(matrixFile)
445  except Exception, e:
446  print "ERROR creating workflows :", str(e)
447  raise
448 
449 
450  def show(self, selected=None, extended=True):
451 
452  self.showWorkFlows(selected,extended)
453  print '\n','-'*80,'\n'
454 
455 
456  def updateDB(self):
457 
458  import pickle
459  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
460 
461  return
462 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
tuple zip
Definition: archive.py:476
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77