CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-',
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_identity':'id-',
51  'relval_machine': 'mach-',
52  'relval_unsch': 'unsch-'
53  }
54 
55  self.files = ['relval_standard' ,
56  'relval_highstats',
57  'relval_pileup',
58  'relval_generator',
59  'relval_extendedgen',
60  'relval_production',
61  'relval_ged',
62  'relval_upgrade',
63  'relval_identity',
64  'relval_machine',
65  'relval_unsch'
66  ]
67 
68  self.relvalModule = None
69 
70  return
71 
72  def makeCmd(self, step):
73 
74  cmd = ''
75  cfg = None
76  input = None
77  for k,v in step.items():
78  if 'no_exec' in k : continue # we want to really run it ...
79  if k.lower() == 'cfg':
80  cfg = v
81  continue # do not append to cmd, return separately
82  if k.lower() == 'input':
83  input = v
84  continue # do not append to cmd, return separately
85 
86  #chain the configs
87  #if k.lower() == '--python':
88  # v = 'step%d_%s'%(index,v)
89  cmd += ' ' + k + ' ' + str(v)
90  return cfg, input, cmd
91 
92  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
93 
94  prefix = self.filesPrefMap[fileNameIn]
95 
96  print "processing ", fileNameIn
97 
98  try:
99  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
100  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
101  except Exception, e:
102  print "ERROR importing file ", fileNameIn, str(e)
103  return
104 
105  print "request for INPUT for ", useInput
106 
107 
108  fromInput={}
109 
110  if useInput:
111  for i in useInput:
112  if ':' in i:
113  (ik,il)=i.split(':')
114  if ik=='all':
115  for k in self.relvalModule.workflows.keys():
116  fromInput[float(k)]=int(il)
117  else:
118  fromInput[float(ik)]=int(il)
119  else:
120  if i=='all':
121  for k in self.relvalModule.workflows.keys():
122  fromInput[float(k)]=0
123  else:
124  fromInput[float(i)]=0
125 
126  if fromScratch:
127  fromScratch=map(float,fromScratch)
128  for num in fromScratch:
129  if num in fromInput:
130  fromInput.pop(num)
131  #overwrite steps
132  if self.overWrite:
133  for p in self.overWrite:
134  self.relvalModule.steps.overwrite(p)
135 
136  #change the origin of dataset on the fly
137  if refRel:
138  if ',' in refRel:
139  refRels=refRel.split(',')
140  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
141  return
142  self.relvalModule.changeRefRelease(
143  self.relvalModule.steps,
144  zip(self.relvalModule.baseDataSetRelease,refRels)
145  )
146  else:
147  self.relvalModule.changeRefRelease(
148  self.relvalModule.steps,
149  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
150  )
151 
152 
153  for num, wfInfo in self.relvalModule.workflows.items():
154  commands=[]
155  wfName = wfInfo[0]
156  stepList = wfInfo[1]
157  # if no explicit name given for the workflow, use the name of step1
158  if wfName.strip() == '': wfName = stepList[0]
159  # option to specialize the wf as the third item in the WF list
160  addTo=None
161  addCom=None
162  if len(wfInfo)>=3:
163  addCom=wfInfo[2]
164  if not type(addCom)==list: addCom=[addCom]
165  #print 'added dict',addCom
166  if len(wfInfo)>=4:
167  addTo=wfInfo[3]
168  #pad with 0
169  while len(addTo)!=len(stepList):
170  addTo.append(0)
171 
172  name=wfName
173  stepIndex=0
174  ranStepList=[]
175 
176  #first resolve INPUT possibilities
177  if num in fromInput:
178  ilevel=fromInput[num]
179  #print num,ilevel
180  for (stepIr,step) in enumerate(reversed(stepList)):
181  stepName=step
182  stepI=(len(stepList)-stepIr)-1
183  #print stepIr,step,stepI,ilevel
184  if stepI>ilevel:
185  #print "ignoring"
186  continue
187  if stepI!=0:
188  testName='__'.join(stepList[0:stepI+1])+'INPUT'
189  else:
190  testName=step+'INPUT'
191  #print "JR",stepI,stepIr,testName,stepList
192  if testName in self.relvalModule.steps.keys():
193  #print "JR",stepI,stepIr
194  stepList[stepI]=testName
195  #pop the rest in the list
196  #print "\tmod prepop",stepList
197  for p in range(stepI):
198  stepList.pop(0)
199  #print "\t\tmod",stepList
200  break
201 
202 
203  for (stepI,step) in enumerate(stepList):
204  stepName=step
205  if self.wm:
206  #cannot put a certain number of things in wm
207  if stepName in [
208  #'HARVEST','HARVESTD','HARVESTDreHLT',
209  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
210  ]:
211  continue
212 
213  #replace stepName is needed
214  #if stepName in self.replaceStep
215  if len(name) > 0 : name += '+'
216  #any step can be mirrored with INPUT
217  ## maybe we want too level deep input
218  """
219  if num in fromInput:
220  if step+'INPUT' in self.relvalModule.steps.keys():
221  stepName = step+"INPUT"
222  stepList.remove(step)
223  stepList.insert(stepIndex,stepName)
224  """
225  name += stepName
226 
227  if addCom and (not addTo or addTo[stepIndex]==1):
229  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
230  cfg, input, opts = self.makeCmd(copyStep)
231  else:
232  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
233 
234  if input and cfg :
235  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
236  raise MatrixException(msg)
237 
238  if input:
239  cmd = input
240  if self.noRun:
241  cmd.run=[]
242  else:
243  if cfg:
244  cmd = 'cmsDriver.py '+cfg+' '+opts
245  else:
246  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
247  if self.wm:
248  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
249  if self.addCommand:
250  if self.apply:
251  if stepIndex in self.apply or stepName in self.apply:
252  cmd +=' '+self.addCommand
253  else:
254  cmd +=' '+self.addCommand
255  if self.wm and self.revertDqmio=='yes':
256  cmd=cmd.replace('DQMIO','DQM')
257  cmd=cmd.replace('--filetype DQM','')
258  commands.append(cmd)
259  ranStepList.append(stepName)
260  stepIndex+=1
261 
262  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
263 
264  return
265 
266 
267  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
268 
269  if selected:
270  selected=map(float,selected)
271  for matrixFile in self.files:
272 
273  self.reset(what)
274 
275  if self.what != 'all' and self.what not in matrixFile:
276  print "ignoring non-requested file",matrixFile
277  continue
278 
279  try:
280  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
281  except Exception, e:
282  print "ERROR reading file:", matrixFile, str(e)
283  raise
284 
285  if not self.workFlowSteps: continue
286 
287  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
288  outFile = open(dataFileName,'w')
289 
290  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
291  ids = self.workFlowSteps.keys()
292  ids.sort()
293  indexAndSteps=[]
294 
295  writtenWF=0
296  for key in ids:
297  if selected and not (key[0] in selected):
298  continue
299  #trick to skip the HImix IB test
300  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
301  num, name, commands, stepList = self.workFlowSteps[key]
302 
303  wfName,stepNames= name.split('+',1)
304 
305  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
306  stepNames=stepNames.replace('+SKIMCOSD','')
307  stepNames=stepNames.replace('+SKIMD','')
308  if 'HARVEST' in stepNames:
309  #find out automatically what to remove
310  exactb=stepNames.index('+HARVEST')
311  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
312  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
313  otherSteps = None
314  if '+' in stepNames:
315  step1,otherSteps = stepNames.split('+',1)
316 
317  line = str(num) + ' ++ '+ wfName
318  if otherSteps and not step1Only:
319  line += ' ++ ' +otherSteps.replace('+',',')
320  else:
321  line += ' ++ none'
322  inputInfo=None
323  if not isinstance(commands[0],str):
324  inputInfo=commands[0]
325  if otherSteps:
326  for (i,c) in enumerate(otherSteps.split('+')):
327  #pad with set
328  for p in range(len(indexAndSteps),i+2):
329  indexAndSteps.append(set())
330  indexAndSteps[i+1].add((c,commands[i+1]))
331 
332  if inputInfo :
333  #skip the samples from INPUT when step1Only is on
334  if step1Only: continue
335  line += ' ++ REALDATA: '+inputInfo.dataSet
336  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
337  line += ', FILES: ' +str(inputInfo.files)
338  line += ', EVENTS: '+str(inputInfo.events)
339  if inputInfo.label!='':
340  line += ', LABEL: ' +inputInfo.label
341  line += ', LOCATION:'+inputInfo.location
342  line += ' @@@'
343  else:
344  line += ' @@@ '+commands[0]
345  if self.revertDqmio=='yes':
346  line=line.replace('DQMIO','DQM')
347  writtenWF+=1
348  outFile.write(line+'\n')
349 
350 
351  outFile.write('\n'+'\n')
352  if step1Only: continue
353 
354  for (index,s) in enumerate(indexAndSteps):
355  for (stepName,cmd) in s:
356  stepIndex=index+1
357  if 'dasquery.log' in cmd: continue
358  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
359  if self.revertDqmio=='yes':
360  line=line.replace('DQMIO','DQM')
361  outFile.write(line+'\n')
362  outFile.write('\n'+'\n')
363  outFile.close()
364  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
365  return
366 
367 
368  def showWorkFlows(self, selected=None, extended=True):
369  if selected: selected = map(float,selected)
370  maxLen = 100 # for summary, limit width of output
371  fmt1 = "%-6s %-35s [1]: %s ..."
372  fmt2 = " %35s [%d]: %s ..."
373  print "\nfound a total of ", len(self.workFlows), ' workflows:'
374  if selected:
375  print " of which the following", len(selected), 'were selected:'
376  #-ap for now:
377  maxLen = -1 # for individual listing, no limit on width
378  fmt1 = "%-6s %-35s [1]: %s "
379  fmt2 = " %35s [%d]: %s"
380 
381  N=[]
382  for wf in self.workFlows:
383  if selected and float(wf.numId) not in selected: continue
384  if extended: print ''
385  #pad with zeros
386  for i in range(len(N),len(wf.cmds)): N.append(0)
387  N[len(wf.cmds)-1]+=1
388  wfName, stepNames = wf.nameId.split('+',1)
389  for i,s in enumerate(wf.cmds):
390  if extended:
391  if i==0:
392  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
393  else:
394  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
395  else:
396  print "%-6s %-35s "% (wf.numId, stepNames)
397  break
398  print ''
399  for i,n in enumerate(N):
400  if n: print n,'workflows with',i+1,'steps'
401 
402  return
403 
404  def createWorkFlows(self, fileNameIn):
405 
406  prefixIn = self.filesPrefMap[fileNameIn]
407 
408  # get through the list of items and update the requested workflows only
409  keyList = self.workFlowSteps.keys()
410  ids = []
411  for item in keyList:
412  id, pref = item
413  if pref != prefixIn : continue
414  ids.append(id)
415  ids.sort()
416  for key in ids:
417  val = self.workFlowSteps[(key,prefixIn)]
418  num, name, commands, stepList = val
419  nameId = str(num)+'_'+name
420  if nameId in self.nameList:
421  print "==> duplicate name found for ", nameId
422  print ' keeping : ', self.nameList[nameId]
423  print ' ignoring : ', val
424  else:
425  self.nameList[nameId] = val
426 
427  self.workFlows.append(WorkFlow(num, name, commands=commands))
428 
429  return
430 
431  def prepare(self, useInput=None, refRel='', fromScratch=None):
432 
433  for matrixFile in self.files:
434  if self.what != 'all' and self.what not in matrixFile:
435  print "ignoring non-requested file",matrixFile
436  continue
437  if self.what == 'all' and ('upgrade' in matrixFile):
438  print "ignoring",matrixFile,"from default matrix"
439  continue
440 
441  try:
442  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
443  except Exception, e:
444  print "ERROR reading file:", matrixFile, str(e)
445  raise
446 
447  try:
448  self.createWorkFlows(matrixFile)
449  except Exception, e:
450  print "ERROR creating workflows :", str(e)
451  raise
452 
453 
454  def show(self, selected=None, extended=True):
455 
456  self.showWorkFlows(selected,extended)
457  print '\n','-'*80,'\n'
458 
459 
460  def updateDB(self):
461 
462  import pickle
463  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
464 
465  return
466 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
tuple zip
Definition: archive.py:476
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77