CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-',
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_identity':'id-',
51  'relval_machine': 'mach-',
52  'relval_unsch': 'unsch-'
53  #, 'relval_premix': 'premix-'
54  }
55 
56  self.files = ['relval_standard' ,
57  'relval_highstats',
58  'relval_pileup',
59  'relval_generator',
60  'relval_extendedgen',
61  'relval_production',
62  'relval_ged',
63  'relval_upgrade',
64  'relval_identity',
65  'relval_machine',
66  'relval_unsch'
67  #, 'relval_premix'
68  ]
69 
70  self.relvalModule = None
71 
72  return
73 
74  def makeCmd(self, step):
75 
76  cmd = ''
77  cfg = None
78  input = None
79  for k,v in step.items():
80  if 'no_exec' in k : continue # we want to really run it ...
81  if k.lower() == 'cfg':
82  cfg = v
83  continue # do not append to cmd, return separately
84  if k.lower() == 'input':
85  input = v
86  continue # do not append to cmd, return separately
87 
88  #chain the configs
89  #if k.lower() == '--python':
90  # v = 'step%d_%s'%(index,v)
91  cmd += ' ' + k + ' ' + str(v)
92  return cfg, input, cmd
93 
94  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
95 
96  prefix = self.filesPrefMap[fileNameIn]
97 
98  print "processing ", fileNameIn
99 
100  try:
101  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
102  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
103  except Exception, e:
104  print "ERROR importing file ", fileNameIn, str(e)
105  return
106 
107  print "request for INPUT for ", useInput
108 
109 
110  fromInput={}
111 
112  if useInput:
113  for i in useInput:
114  if ':' in i:
115  (ik,il)=i.split(':')
116  if ik=='all':
117  for k in self.relvalModule.workflows.keys():
118  fromInput[float(k)]=int(il)
119  else:
120  fromInput[float(ik)]=int(il)
121  else:
122  if i=='all':
123  for k in self.relvalModule.workflows.keys():
124  fromInput[float(k)]=0
125  else:
126  fromInput[float(i)]=0
127 
128  if fromScratch:
129  fromScratch=map(float,fromScratch)
130  for num in fromScratch:
131  if num in fromInput:
132  fromInput.pop(num)
133  #overwrite steps
134  if self.overWrite:
135  for p in self.overWrite:
136  self.relvalModule.steps.overwrite(p)
137 
138  #change the origin of dataset on the fly
139  if refRel:
140  if ',' in refRel:
141  refRels=refRel.split(',')
142  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
143  return
144  self.relvalModule.changeRefRelease(
145  self.relvalModule.steps,
146  zip(self.relvalModule.baseDataSetRelease,refRels)
147  )
148  else:
149  self.relvalModule.changeRefRelease(
150  self.relvalModule.steps,
151  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
152  )
153 
154 
155  for num, wfInfo in self.relvalModule.workflows.items():
156  commands=[]
157  wfName = wfInfo[0]
158  stepList = wfInfo[1]
159  # if no explicit name given for the workflow, use the name of step1
160  if wfName.strip() == '': wfName = stepList[0]
161  # option to specialize the wf as the third item in the WF list
162  addTo=None
163  addCom=None
164  if len(wfInfo)>=3:
165  addCom=wfInfo[2]
166  if not type(addCom)==list: addCom=[addCom]
167  #print 'added dict',addCom
168  if len(wfInfo)>=4:
169  addTo=wfInfo[3]
170  #pad with 0
171  while len(addTo)!=len(stepList):
172  addTo.append(0)
173 
174  name=wfName
175  stepIndex=0
176  ranStepList=[]
177 
178  #first resolve INPUT possibilities
179  if num in fromInput:
180  ilevel=fromInput[num]
181  #print num,ilevel
182  for (stepIr,step) in enumerate(reversed(stepList)):
183  stepName=step
184  stepI=(len(stepList)-stepIr)-1
185  #print stepIr,step,stepI,ilevel
186  if stepI>ilevel:
187  #print "ignoring"
188  continue
189  if stepI!=0:
190  testName='__'.join(stepList[0:stepI+1])+'INPUT'
191  else:
192  testName=step+'INPUT'
193  #print "JR",stepI,stepIr,testName,stepList
194  if testName in self.relvalModule.steps.keys():
195  #print "JR",stepI,stepIr
196  stepList[stepI]=testName
197  #pop the rest in the list
198  #print "\tmod prepop",stepList
199  for p in range(stepI):
200  stepList.pop(0)
201  #print "\t\tmod",stepList
202  break
203 
204 
205  for (stepI,step) in enumerate(stepList):
206  stepName=step
207  if self.wm:
208  #cannot put a certain number of things in wm
209  if stepName in [
210  #'HARVEST','HARVESTD','HARVESTDreHLT',
211  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
212  ]:
213  continue
214 
215  #replace stepName is needed
216  #if stepName in self.replaceStep
217  if len(name) > 0 : name += '+'
218  #any step can be mirrored with INPUT
219  ## maybe we want too level deep input
220  """
221  if num in fromInput:
222  if step+'INPUT' in self.relvalModule.steps.keys():
223  stepName = step+"INPUT"
224  stepList.remove(step)
225  stepList.insert(stepIndex,stepName)
226  """
227  name += stepName
228 
229  if addCom and (not addTo or addTo[stepIndex]==1):
231  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
232  cfg, input, opts = self.makeCmd(copyStep)
233  else:
234  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
235 
236  if input and cfg :
237  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
238  raise MatrixException(msg)
239 
240  if input:
241  cmd = input
242  if self.noRun:
243  cmd.run=[]
244  else:
245  if cfg:
246  cmd = 'cmsDriver.py '+cfg+' '+opts
247  else:
248  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
249  if self.wm:
250  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
251  if self.addCommand:
252  if self.apply:
253  if stepIndex in self.apply or stepName in self.apply:
254  cmd +=' '+self.addCommand
255  else:
256  cmd +=' '+self.addCommand
257  if self.wm and self.revertDqmio=='yes':
258  cmd=cmd.replace('DQMIO','DQM')
259  cmd=cmd.replace('--filetype DQM','')
260  commands.append(cmd)
261  ranStepList.append(stepName)
262  stepIndex+=1
263 
264  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
265 
266  return
267 
268 
269  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
270 
271  if selected:
272  selected=map(float,selected)
273  for matrixFile in self.files:
274 
275  self.reset(what)
276 
277  if self.what != 'all' and self.what not in matrixFile:
278  print "ignoring non-requested file",matrixFile
279  continue
280 
281  try:
282  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
283  except Exception, e:
284  print "ERROR reading file:", matrixFile, str(e)
285  raise
286 
287  if not self.workFlowSteps: continue
288 
289  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
290  outFile = open(dataFileName,'w')
291 
292  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
293  ids = self.workFlowSteps.keys()
294  ids.sort()
295  indexAndSteps=[]
296 
297  writtenWF=0
298  for key in ids:
299  if selected and not (key[0] in selected):
300  continue
301  #trick to skip the HImix IB test
302  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
303  num, name, commands, stepList = self.workFlowSteps[key]
304 
305  wfName,stepNames= name.split('+',1)
306 
307  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
308  stepNames=stepNames.replace('+SKIMCOSD','')
309  stepNames=stepNames.replace('+SKIMD','')
310  if 'HARVEST' in stepNames:
311  #find out automatically what to remove
312  exactb=stepNames.index('+HARVEST')
313  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
314  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
315  otherSteps = None
316  if '+' in stepNames:
317  step1,otherSteps = stepNames.split('+',1)
318 
319  line = str(num) + ' ++ '+ wfName
320  if otherSteps and not step1Only:
321  line += ' ++ ' +otherSteps.replace('+',',')
322  else:
323  line += ' ++ none'
324  inputInfo=None
325  if not isinstance(commands[0],str):
326  inputInfo=commands[0]
327  if otherSteps:
328  for (i,c) in enumerate(otherSteps.split('+')):
329  #pad with set
330  for p in range(len(indexAndSteps),i+2):
331  indexAndSteps.append(set())
332  indexAndSteps[i+1].add((c,commands[i+1]))
333 
334  if inputInfo :
335  #skip the samples from INPUT when step1Only is on
336  if step1Only: continue
337  line += ' ++ REALDATA: '+inputInfo.dataSet
338  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
339  line += ', FILES: ' +str(inputInfo.files)
340  line += ', EVENTS: '+str(inputInfo.events)
341  if inputInfo.label!='':
342  line += ', LABEL: ' +inputInfo.label
343  line += ', LOCATION:'+inputInfo.location
344  line += ' @@@'
345  else:
346  line += ' @@@ '+commands[0]
347  if self.revertDqmio=='yes':
348  line=line.replace('DQMIO','DQM')
349  writtenWF+=1
350  outFile.write(line+'\n')
351 
352 
353  outFile.write('\n'+'\n')
354  if step1Only: continue
355 
356  for (index,s) in enumerate(indexAndSteps):
357  for (stepName,cmd) in s:
358  stepIndex=index+1
359  if 'dasquery.log' in cmd: continue
360  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
361  if self.revertDqmio=='yes':
362  line=line.replace('DQMIO','DQM')
363  outFile.write(line+'\n')
364  outFile.write('\n'+'\n')
365  outFile.close()
366  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
367  return
368 
369 
370  def showWorkFlows(self, selected=None, extended=True):
371  if selected: selected = map(float,selected)
372  maxLen = 100 # for summary, limit width of output
373  fmt1 = "%-6s %-35s [1]: %s ..."
374  fmt2 = " %35s [%d]: %s ..."
375  print "\nfound a total of ", len(self.workFlows), ' workflows:'
376  if selected:
377  print " of which the following", len(selected), 'were selected:'
378  #-ap for now:
379  maxLen = -1 # for individual listing, no limit on width
380  fmt1 = "%-6s %-35s [1]: %s "
381  fmt2 = " %35s [%d]: %s"
382 
383  N=[]
384  for wf in self.workFlows:
385  if selected and float(wf.numId) not in selected: continue
386  if extended: print ''
387  #pad with zeros
388  for i in range(len(N),len(wf.cmds)): N.append(0)
389  N[len(wf.cmds)-1]+=1
390  wfName, stepNames = wf.nameId.split('+',1)
391  for i,s in enumerate(wf.cmds):
392  if extended:
393  if i==0:
394  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
395  else:
396  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
397  else:
398  print "%-6s %-35s "% (wf.numId, stepNames)
399  break
400  print ''
401  for i,n in enumerate(N):
402  if n: print n,'workflows with',i+1,'steps'
403 
404  return
405 
406  def createWorkFlows(self, fileNameIn):
407 
408  prefixIn = self.filesPrefMap[fileNameIn]
409 
410  # get through the list of items and update the requested workflows only
411  keyList = self.workFlowSteps.keys()
412  ids = []
413  for item in keyList:
414  id, pref = item
415  if pref != prefixIn : continue
416  ids.append(id)
417  ids.sort()
418  for key in ids:
419  val = self.workFlowSteps[(key,prefixIn)]
420  num, name, commands, stepList = val
421  nameId = str(num)+'_'+name
422  if nameId in self.nameList:
423  print "==> duplicate name found for ", nameId
424  print ' keeping : ', self.nameList[nameId]
425  print ' ignoring : ', val
426  else:
427  self.nameList[nameId] = val
428 
429  self.workFlows.append(WorkFlow(num, name, commands=commands))
430 
431  return
432 
433  def prepare(self, useInput=None, refRel='', fromScratch=None):
434 
435  for matrixFile in self.files:
436  if self.what != 'all' and self.what not in matrixFile:
437  print "ignoring non-requested file",matrixFile
438  continue
439  if self.what == 'all' and ('upgrade' in matrixFile):
440  print "ignoring",matrixFile,"from default matrix"
441  continue
442 
443  try:
444  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
445  except Exception, e:
446  print "ERROR reading file:", matrixFile, str(e)
447  raise
448 
449  try:
450  self.createWorkFlows(matrixFile)
451  except Exception, e:
452  print "ERROR creating workflows :", str(e)
453  raise
454 
455 
456  def show(self, selected=None, extended=True):
457 
458  self.showWorkFlows(selected,extended)
459  print '\n','-'*80,'\n'
460 
461 
462  def updateDB(self):
463 
464  import pickle
465  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
466 
467  return
468 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
tuple zip
Definition: archive.py:476
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77