CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-' ,
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_identity':'id-',
51  'relval_machine': 'mach-'
52  }
53 
54  self.files = ['relval_standard' ,
55  'relval_highstats',
56  'relval_pileup',
57  'relval_generator',
58  'relval_extendedgen',
59  'relval_production',
60  'relval_ged',
61  'relval_upgrade',
62  'relval_identity',
63  'relval_machine'
64  ]
65 
66  self.relvalModule = None
67 
68  return
69 
70  def makeCmd(self, step):
71 
72  cmd = ''
73  cfg = None
74  input = None
75  for k,v in step.items():
76  if 'no_exec' in k : continue # we want to really run it ...
77  if k.lower() == 'cfg':
78  cfg = v
79  continue # do not append to cmd, return separately
80  if k.lower() == 'input':
81  input = v
82  continue # do not append to cmd, return separately
83 
84  #chain the configs
85  #if k.lower() == '--python':
86  # v = 'step%d_%s'%(index,v)
87  cmd += ' ' + k + ' ' + str(v)
88  return cfg, input, cmd
89 
90  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
91 
92  prefix = self.filesPrefMap[fileNameIn]
93 
94  print "processing ", fileNameIn
95 
96  try:
97  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
98  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
99  except Exception, e:
100  print "ERROR importing file ", fileNameIn, str(e)
101  return
102 
103  print "request for INPUT for ", useInput
104 
105 
106  fromInput={}
107 
108  if useInput:
109  for i in useInput:
110  if ':' in i:
111  (ik,il)=i.split(':')
112  if ik=='all':
113  for k in self.relvalModule.workflows.keys():
114  fromInput[float(k)]=int(il)
115  else:
116  fromInput[float(ik)]=int(il)
117  else:
118  if i=='all':
119  for k in self.relvalModule.workflows.keys():
120  fromInput[float(k)]=0
121  else:
122  fromInput[float(i)]=0
123 
124  if fromScratch:
125  fromScratch=map(float,fromScratch)
126  for num in fromScratch:
127  if num in fromInput:
128  fromInput.pop(num)
129  #overwrite steps
130  if self.overWrite:
131  for p in self.overWrite:
132  self.relvalModule.steps.overwrite(p)
133 
134  #change the origin of dataset on the fly
135  if refRel:
136  if ',' in refRel:
137  refRels=refRel.split(',')
138  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
139  return
140  self.relvalModule.changeRefRelease(
141  self.relvalModule.steps,
142  zip(self.relvalModule.baseDataSetRelease,refRels)
143  )
144  else:
145  self.relvalModule.changeRefRelease(
146  self.relvalModule.steps,
147  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
148  )
149 
150 
151  for num, wfInfo in self.relvalModule.workflows.items():
152  commands=[]
153  wfName = wfInfo[0]
154  stepList = wfInfo[1]
155  # if no explicit name given for the workflow, use the name of step1
156  if wfName.strip() == '': wfName = stepList[0]
157  # option to specialize the wf as the third item in the WF list
158  addTo=None
159  addCom=None
160  if len(wfInfo)>=3:
161  addCom=wfInfo[2]
162  if not type(addCom)==list: addCom=[addCom]
163  #print 'added dict',addCom
164  if len(wfInfo)>=4:
165  addTo=wfInfo[3]
166  #pad with 0
167  while len(addTo)!=len(stepList):
168  addTo.append(0)
169 
170  name=wfName
171  stepIndex=0
172  ranStepList=[]
173 
174  #first resolve INPUT possibilities
175  if num in fromInput:
176  ilevel=fromInput[num]
177  #print num,ilevel
178  for (stepIr,step) in enumerate(reversed(stepList)):
179  stepName=step
180  stepI=(len(stepList)-stepIr)-1
181  #print stepIr,step,stepI,ilevel
182  if stepI>ilevel:
183  #print "ignoring"
184  continue
185  if stepI!=0:
186  testName='__'.join(stepList[0:stepI+1])+'INPUT'
187  else:
188  testName=step+'INPUT'
189  #print "JR",stepI,stepIr,testName,stepList
190  if testName in self.relvalModule.steps.keys():
191  #print "JR",stepI,stepIr
192  stepList[stepI]=testName
193  #pop the rest in the list
194  #print "\tmod prepop",stepList
195  for p in range(stepI):
196  stepList.pop(0)
197  #print "\t\tmod",stepList
198  break
199 
200 
201  for (stepI,step) in enumerate(stepList):
202  stepName=step
203  if self.wm:
204  #cannot put a certain number of things in wm
205  if stepName in [
206  #'HARVEST','HARVESTD','HARVESTDreHLT',
207  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
208  ]:
209  continue
210 
211  #replace stepName is needed
212  #if stepName in self.replaceStep
213  if len(name) > 0 : name += '+'
214  #any step can be mirrored with INPUT
215  ## maybe we want too level deep input
216  """
217  if num in fromInput:
218  if step+'INPUT' in self.relvalModule.steps.keys():
219  stepName = step+"INPUT"
220  stepList.remove(step)
221  stepList.insert(stepIndex,stepName)
222  """
223  name += stepName
224 
225  if addCom and (not addTo or addTo[stepIndex]==1):
227  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
228  cfg, input, opts = self.makeCmd(copyStep)
229  else:
230  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
231 
232  if input and cfg :
233  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
234  raise MatrixException(msg)
235 
236  if input:
237  cmd = input
238  if self.noRun:
239  cmd.run=[]
240  else:
241  if cfg:
242  cmd = 'cmsDriver.py '+cfg+' '+opts
243  else:
244  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
245  if self.wm:
246  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
247  if self.addCommand:
248  if self.apply:
249  if stepIndex in self.apply or stepName in self.apply:
250  cmd +=' '+self.addCommand
251  else:
252  cmd +=' '+self.addCommand
253  if self.wm and self.revertDqmio=='yes':
254  cmd=cmd.replace('DQMIO','DQM')
255  cmd=cmd.replace('--filetype DQM','')
256  commands.append(cmd)
257  ranStepList.append(stepName)
258  stepIndex+=1
259 
260  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
261 
262  return
263 
264 
265  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
266 
267  if selected:
268  selected=map(float,selected)
269  for matrixFile in self.files:
270 
271  self.reset(what)
272 
273  if self.what != 'all' and self.what not in matrixFile:
274  print "ignoring non-requested file",matrixFile
275  continue
276 
277  try:
278  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
279  except Exception, e:
280  print "ERROR reading file:", matrixFile, str(e)
281  raise
282 
283  if not self.workFlowSteps: continue
284 
285  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
286  outFile = open(dataFileName,'w')
287 
288  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
289  ids = self.workFlowSteps.keys()
290  ids.sort()
291  indexAndSteps=[]
292 
293  writtenWF=0
294  for key in ids:
295  if selected and not (key[0] in selected):
296  continue
297  #trick to skip the HImix IB test
298  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
299  num, name, commands, stepList = self.workFlowSteps[key]
300 
301  wfName,stepNames= name.split('+',1)
302 
303  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
304  stepNames=stepNames.replace('+SKIMCOSD','')
305  stepNames=stepNames.replace('+SKIMD','')
306  if 'HARVEST' in stepNames:
307  #find out automatically what to remove
308  exactb=stepNames.index('+HARVEST')
309  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
310  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
311  otherSteps = None
312  if '+' in stepNames:
313  step1,otherSteps = stepNames.split('+',1)
314 
315  line = str(num) + ' ++ '+ wfName
316  if otherSteps and not step1Only:
317  line += ' ++ ' +otherSteps.replace('+',',')
318  else:
319  line += ' ++ none'
320  inputInfo=None
321  if not isinstance(commands[0],str):
322  inputInfo=commands[0]
323  if otherSteps:
324  for (i,c) in enumerate(otherSteps.split('+')):
325  #pad with set
326  for p in range(len(indexAndSteps),i+2):
327  indexAndSteps.append(set())
328  indexAndSteps[i+1].add((c,commands[i+1]))
329 
330  if inputInfo :
331  #skip the samples from INPUT when step1Only is on
332  if step1Only: continue
333  line += ' ++ REALDATA: '+inputInfo.dataSet
334  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
335  line += ', FILES: ' +str(inputInfo.files)
336  line += ', EVENTS: '+str(inputInfo.events)
337  if inputInfo.label!='':
338  line += ', LABEL: ' +inputInfo.label
339  line += ', LOCATION:'+inputInfo.location
340  line += ' @@@'
341  else:
342  line += ' @@@ '+commands[0]
343  if self.revertDqmio=='yes':
344  line=line.replace('DQMIO','DQM')
345  writtenWF+=1
346  outFile.write(line+'\n')
347 
348 
349  outFile.write('\n'+'\n')
350  if step1Only: continue
351 
352  for (index,s) in enumerate(indexAndSteps):
353  for (stepName,cmd) in s:
354  stepIndex=index+1
355  if 'dasquery.log' in cmd: continue
356  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
357  if self.revertDqmio=='yes':
358  line=line.replace('DQMIO','DQM')
359  outFile.write(line+'\n')
360  outFile.write('\n'+'\n')
361  outFile.close()
362  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
363  return
364 
365 
366  def showWorkFlows(self, selected=None, extended=True):
367  if selected: selected = map(float,selected)
368  maxLen = 100 # for summary, limit width of output
369  fmt1 = "%-6s %-35s [1]: %s ..."
370  fmt2 = " %35s [%d]: %s ..."
371  print "\nfound a total of ", len(self.workFlows), ' workflows:'
372  if selected:
373  print " of which the following", len(selected), 'were selected:'
374  #-ap for now:
375  maxLen = -1 # for individual listing, no limit on width
376  fmt1 = "%-6s %-35s [1]: %s "
377  fmt2 = " %35s [%d]: %s"
378 
379  N=[]
380  for wf in self.workFlows:
381  if selected and float(wf.numId) not in selected: continue
382  if extended: print ''
383  #pad with zeros
384  for i in range(len(N),len(wf.cmds)): N.append(0)
385  N[len(wf.cmds)-1]+=1
386  wfName, stepNames = wf.nameId.split('+',1)
387  for i,s in enumerate(wf.cmds):
388  if extended:
389  if i==0:
390  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
391  else:
392  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
393  else:
394  print "%-6s %-35s "% (wf.numId, stepNames)
395  break
396  print ''
397  for i,n in enumerate(N):
398  if n: print n,'workflows with',i+1,'steps'
399 
400  return
401 
402  def createWorkFlows(self, fileNameIn):
403 
404  prefixIn = self.filesPrefMap[fileNameIn]
405 
406  # get through the list of items and update the requested workflows only
407  keyList = self.workFlowSteps.keys()
408  ids = []
409  for item in keyList:
410  id, pref = item
411  if pref != prefixIn : continue
412  ids.append(id)
413  ids.sort()
414  for key in ids:
415  val = self.workFlowSteps[(key,prefixIn)]
416  num, name, commands, stepList = val
417  nameId = str(num)+'_'+name
418  if nameId in self.nameList:
419  print "==> duplicate name found for ", nameId
420  print ' keeping : ', self.nameList[nameId]
421  print ' ignoring : ', val
422  else:
423  self.nameList[nameId] = val
424 
425  self.workFlows.append(WorkFlow(num, name, commands=commands))
426 
427  return
428 
429  def prepare(self, useInput=None, refRel='', fromScratch=None):
430 
431  for matrixFile in self.files:
432  if self.what != 'all' and self.what not in matrixFile:
433  print "ignoring non-requested file",matrixFile
434  continue
435  if self.what == 'all' and ('upgrade' in matrixFile):
436  print "ignoring",matrixFile,"from default matrix"
437  continue
438 
439  try:
440  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
441  except Exception, e:
442  print "ERROR reading file:", matrixFile, str(e)
443  raise
444 
445  try:
446  self.createWorkFlows(matrixFile)
447  except Exception, e:
448  print "ERROR creating workflows :", str(e)
449  raise
450 
451 
452  def show(self, selected=None, extended=True):
453 
454  self.showWorkFlows(selected,extended)
455  print '\n','-'*80,'\n'
456 
457 
458  def updateDB(self):
459 
460  import pickle
461  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
462 
463  return
464 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
tuple zip
Definition: archive.py:476
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77