CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.addCommand=opt.command
24  self.apply=opt.apply
25  self.commandLineWf=opt.workflow
26  self.overWrite=opt.overWrite
27 
28  self.noRun = opt.noRun
29  return
30 
31  def reset(self, what='all'):
32 
33  self.what = what
34 
35  #a bunch of information, but not yet the WorkFlow object
36  self.workFlowSteps = {}
37  #the actual WorkFlow objects
38  self.workFlows = []
39  self.nameList = {}
40 
41  self.filesPrefMap = {'relval_standard' : 'std-' ,
42  'relval_highstats': 'hi-' ,
43  'relval_pileup': 'PU-' ,
44  'relval_generator': 'gen-' ,
45  'relval_production': 'prod-' ,
46  'relval_ged': 'ged-',
47  'relval_upgrade':'upg-',
48  'relval_identity':'id-'
49  }
50 
51  self.files = ['relval_standard' ,
52  'relval_highstats',
53  'relval_pileup',
54  'relval_generator',
55  'relval_production',
56  'relval_ged',
57  'relval_upgrade',
58  'relval_identity'
59  ]
60 
61  self.relvalModule = None
62 
63  return
64 
65  def makeCmd(self, step):
66 
67  cmd = ''
68  cfg = None
69  input = None
70  for k,v in step.items():
71  if 'no_exec' in k : continue # we want to really run it ...
72  if k.lower() == 'cfg':
73  cfg = v
74  continue # do not append to cmd, return separately
75  if k.lower() == 'input':
76  input = v
77  continue # do not append to cmd, return separately
78 
79  #chain the configs
80  #if k.lower() == '--python':
81  # v = 'step%d_%s'%(index,v)
82  cmd += ' ' + k + ' ' + str(v)
83  return cfg, input, cmd
84 
85  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
86 
87  prefix = self.filesPrefMap[fileNameIn]
88 
89  print "processing ", fileNameIn
90 
91  try:
92  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
93  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
94  except Exception, e:
95  print "ERROR importing file ", fileNameIn, str(e)
96  return
97 
98  print "request for INPUT for ", useInput
99 
100 
101  fromInput={}
102 
103  if useInput:
104  for i in useInput:
105  if ':' in i:
106  (ik,il)=i.split(':')
107  if ik=='all':
108  for k in self.relvalModule.workflows.keys():
109  fromInput[float(k)]=int(il)
110  else:
111  fromInput[float(ik)]=int(il)
112  else:
113  if i=='all':
114  for k in self.relvalModule.workflows.keys():
115  fromInput[float(k)]=0
116  else:
117  fromInput[float(i)]=0
118 
119  if fromScratch:
120  fromScratch=map(float,fromScratch)
121  for num in fromScratch:
122  if num in fromInput:
123  fromInput.pop(num)
124  #overwrite steps
125  if self.overWrite:
126  for p in self.overWrite:
127  self.relvalModule.steps.overwrite(p)
128 
129  #change the origin of dataset on the fly
130  if refRel:
131  if ',' in refRel:
132  refRels=refRel.split(',')
133  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
134  return
135  self.relvalModule.changeRefRelease(
136  self.relvalModule.steps,
137  zip(self.relvalModule.baseDataSetRelease,refRels)
138  )
139  else:
140  self.relvalModule.changeRefRelease(
141  self.relvalModule.steps,
142  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
143  )
144 
145 
146  for num, wfInfo in self.relvalModule.workflows.items():
147  commands=[]
148  wfName = wfInfo[0]
149  stepList = wfInfo[1]
150  # if no explicit name given for the workflow, use the name of step1
151  if wfName.strip() == '': wfName = stepList[0]
152  # option to specialize the wf as the third item in the WF list
153  addTo=None
154  addCom=None
155  if len(wfInfo)>=3:
156  addCom=wfInfo[2]
157  if not type(addCom)==list: addCom=[addCom]
158  #print 'added dict',addCom
159  if len(wfInfo)>=4:
160  addTo=wfInfo[3]
161  #pad with 0
162  while len(addTo)!=len(stepList):
163  addTo.append(0)
164 
165  name=wfName
166  stepIndex=0
167  ranStepList=[]
168 
169  #first resolve INPUT possibilities
170  if num in fromInput:
171  ilevel=fromInput[num]
172  #print num,ilevel
173  for (stepIr,step) in enumerate(reversed(stepList)):
174  stepName=step
175  stepI=(len(stepList)-stepIr)-1
176  #print stepIr,step,stepI,ilevel
177  if stepI>ilevel:
178  #print "ignoring"
179  continue
180  if stepI!=0:
181  testName='__'.join(stepList[0:stepI+1])+'INPUT'
182  else:
183  testName=step+'INPUT'
184  #print "JR",stepI,stepIr,testName,stepList
185  if testName in self.relvalModule.steps.keys():
186  #print "JR",stepI,stepIr
187  stepList[stepI]=testName
188  #pop the rest in the list
189  #print "\tmod prepop",stepList
190  for p in range(stepI):
191  stepList.pop(0)
192  #print "\t\tmod",stepList
193  break
194 
195 
196  for (stepI,step) in enumerate(stepList):
197  stepName=step
198  if self.wm:
199  #cannot put a certain number of things in wm
200  if stepName in [
201  #'HARVEST','HARVESTD','HARVESTDreHLT',
202  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
203  ]:
204  continue
205 
206  #replace stepName is needed
207  #if stepName in self.replaceStep
208  if len(name) > 0 : name += '+'
209  #any step can be mirrored with INPUT
210  ## maybe we want too level deep input
211  """
212  if num in fromInput:
213  if step+'INPUT' in self.relvalModule.steps.keys():
214  stepName = step+"INPUT"
215  stepList.remove(step)
216  stepList.insert(stepIndex,stepName)
217  """
218  name += stepName
219 
220  if addCom and (not addTo or addTo[stepIndex]==1):
222  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
223  cfg, input, opts = self.makeCmd(copyStep)
224  else:
225  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
226 
227  if input and cfg :
228  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
229  raise MatrixException(msg)
230 
231  if input:
232  cmd = input
233  if self.noRun:
234  cmd.run=[]
235  else:
236  if cfg:
237  cmd = 'cmsDriver.py '+cfg+' '+opts
238  else:
239  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
240  if self.wm:
241  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
242  if self.addCommand:
243  if self.apply:
244  if stepIndex in self.apply or stepName in self.apply:
245  cmd +=' '+self.addCommand
246  else:
247  cmd +=' '+self.addCommand
248  if self.wm:
249  cmd=cmd.replace('DQMROOT','DQM')
250  cmd=cmd.replace('--filetype DQM','')
251  commands.append(cmd)
252  ranStepList.append(stepName)
253  stepIndex+=1
254 
255  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
256 
257  return
258 
259 
260  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
261 
262  if selected:
263  selected=map(float,selected)
264  for matrixFile in self.files:
265 
266  self.reset(what)
267 
268  if self.what != 'all' and self.what not in matrixFile:
269  print "ignoring non-requested file",matrixFile
270  continue
271 
272  try:
273  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
274  except Exception, e:
275  print "ERROR reading file:", matrixFile, str(e)
276  raise
277 
278  if not self.workFlowSteps: continue
279 
280  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
281  outFile = open(dataFileName,'w')
282 
283  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
284  ids = self.workFlowSteps.keys()
285  ids.sort()
286  indexAndSteps=[]
287 
288  writtenWF=0
289  for key in ids:
290  if selected and not (key[0] in selected):
291  continue
292  #trick to skip the HImix IB test
293  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
294  num, name, commands, stepList = self.workFlowSteps[key]
295 
296  wfName,stepNames= name.split('+',1)
297 
298  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
299  stepNames=stepNames.replace('+SKIMCOSD','')
300  stepNames=stepNames.replace('+SKIMD','')
301  if 'HARVEST' in stepNames:
302  #find out automatically what to remove
303  exactb=stepNames.index('+HARVEST')
304  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
305  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
306  otherSteps = None
307  if '+' in stepNames:
308  step1,otherSteps = stepNames.split('+',1)
309 
310  line = str(num) + ' ++ '+ wfName
311  if otherSteps and not step1Only:
312  line += ' ++ ' +otherSteps.replace('+',',')
313  else:
314  line += ' ++ none'
315  inputInfo=None
316  if not isinstance(commands[0],str):
317  inputInfo=commands[0]
318  if otherSteps:
319  for (i,c) in enumerate(otherSteps.split('+')):
320  #pad with set
321  for p in range(len(indexAndSteps),i+2):
322  indexAndSteps.append(set())
323  indexAndSteps[i+1].add((c,commands[i+1]))
324 
325  if inputInfo :
326  #skip the samples from INPUT when step1Only is on
327  if step1Only: continue
328  line += ' ++ REALDATA: '+inputInfo.dataSet
329  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
330  line += ', FILES: ' +str(inputInfo.files)
331  line += ', EVENTS: '+str(inputInfo.events)
332  if inputInfo.label!='':
333  line += ', LABEL: ' +inputInfo.label
334  line += ', LOCATION:'+inputInfo.location
335  line += ' @@@'
336  else:
337  line += ' @@@ '+commands[0]
338  line=line.replace('DQMROOT','DQM')
339  writtenWF+=1
340  outFile.write(line+'\n')
341 
342 
343  outFile.write('\n'+'\n')
344  if step1Only: continue
345 
346  for (index,s) in enumerate(indexAndSteps):
347  for (stepName,cmd) in s:
348  stepIndex=index+1
349  if 'dbsquery.log' in cmd: continue
350  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
351  line=line.replace('DQMROOT','DQM')
352  outFile.write(line+'\n')
353  outFile.write('\n'+'\n')
354  outFile.close()
355  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
356  return
357 
358 
359  def showWorkFlows(self, selected=None, extended=True):
360  if selected: selected = map(float,selected)
361  maxLen = 100 # for summary, limit width of output
362  fmt1 = "%-6s %-35s [1]: %s ..."
363  fmt2 = " %35s [%d]: %s ..."
364  print "\nfound a total of ", len(self.workFlows), ' workflows:'
365  if selected:
366  print " of which the following", len(selected), 'were selected:'
367  #-ap for now:
368  maxLen = -1 # for individual listing, no limit on width
369  fmt1 = "%-6s %-35s [1]: %s "
370  fmt2 = " %35s [%d]: %s"
371 
372  N=[]
373  for wf in self.workFlows:
374  if selected and float(wf.numId) not in selected: continue
375  if extended: print ''
376  #pad with zeros
377  for i in range(len(N),len(wf.cmds)): N.append(0)
378  N[len(wf.cmds)-1]+=1
379  wfName, stepNames = wf.nameId.split('+',1)
380  for i,s in enumerate(wf.cmds):
381  if extended:
382  if i==0:
383  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
384  else:
385  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
386  else:
387  print "%-6s %-35s "% (wf.numId, stepNames)
388  break
389  print ''
390  for i,n in enumerate(N):
391  if n: print n,'workflows with',i+1,'steps'
392 
393  return
394 
395  def createWorkFlows(self, fileNameIn):
396 
397  prefixIn = self.filesPrefMap[fileNameIn]
398 
399  # get through the list of items and update the requested workflows only
400  keyList = self.workFlowSteps.keys()
401  ids = []
402  for item in keyList:
403  id, pref = item
404  if pref != prefixIn : continue
405  ids.append(id)
406  ids.sort()
407  for key in ids:
408  val = self.workFlowSteps[(key,prefixIn)]
409  num, name, commands, stepList = val
410  nameId = str(num)+'_'+name
411  if nameId in self.nameList:
412  print "==> duplicate name found for ", nameId
413  print ' keeping : ', self.nameList[nameId]
414  print ' ignoring : ', val
415  else:
416  self.nameList[nameId] = val
417 
418  self.workFlows.append(WorkFlow(num, name, commands=commands))
419 
420  return
421 
422  def prepare(self, useInput=None, refRel='', fromScratch=None):
423 
424  for matrixFile in self.files:
425  if self.what != 'all' and self.what not in matrixFile:
426  print "ignoring non-requested file",matrixFile
427  continue
428  if self.what == 'all' and ('upgrade' in matrixFile):
429  print "ignoring",matrixFile,"from default matrix"
430  continue
431 
432  try:
433  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
434  except Exception, e:
435  print "ERROR reading file:", matrixFile, str(e)
436  raise
437 
438  try:
439  self.createWorkFlows(matrixFile)
440  except Exception, e:
441  print "ERROR creating workflows :", str(e)
442  raise
443 
444 
445  def show(self, selected=None, extended=True):
446 
447  self.showWorkFlows(selected,extended)
448  print '\n','-'*80,'\n'
449 
450 
451  def updateDB(self):
452 
453  import pickle
454  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
455 
456  return
457 
Definition: merge.py:1
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
dictionary map
Definition: Association.py:205
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77