CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 
2 import sys
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.addCommand=opt.command
24  self.commandLineWf=opt.workflow
25  self.overWrite=opt.overWrite
26 
27  return
28 
29  def reset(self, what='all'):
30 
31  self.what = what
32 
33  #a bunch of information, but not yet the WorkFlow object
34  self.workFlowSteps = {}
35  #the actual WorkFlow objects
36  self.workFlows = []
37  self.nameList = {}
38 
39  self.filesPrefMap = {'relval_standard' : 'std-' ,
40  'relval_highstats': 'hi-' ,
41  'relval_pileup': 'PU-' ,
42  'relval_generator': 'gen-' ,
43  'relval_production': 'prod-' ,
44  'relval_ged': 'ged-',
45  'relval_identity':'id-'
46  }
47 
48  self.files = ['relval_standard' ,
49  'relval_highstats',
50  'relval_pileup',
51  'relval_generator',
52  'relval_production',
53  'relval_ged',
54  'relval_identity'
55  ]
56 
57  self.relvalModule = None
58 
59  return
60 
61  def makeCmd(self, step):
62 
63  cmd = ''
64  cfg = None
65  input = None
66  for k,v in step.items():
67  if 'no_exec' in k : continue # we want to really run it ...
68  if k.lower() == 'cfg':
69  cfg = v
70  continue # do not append to cmd, return separately
71  if k.lower() == 'input':
72  input = v
73  continue # do not append to cmd, return separately
74 
75  #chain the configs
76  #if k.lower() == '--python':
77  # v = 'step%d_%s'%(index,v)
78  cmd += ' ' + k + ' ' + str(v)
79  return cfg, input, cmd
80 
81  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
82 
83  prefix = self.filesPrefMap[fileNameIn]
84 
85  print "processing ", fileNameIn
86 
87  try:
88  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
89  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
90  except Exception, e:
91  print "ERROR importing file ", fileNameIn, str(e)
92  return
93 
94  print "request for INPUT for ", useInput
95 
96 
97  fromInput={}
98 
99  if useInput:
100  for i in useInput:
101  if ':' in i:
102  (ik,il)=i.split(':')
103  if ik=='all':
104  for k in self.relvalModule.workflows.keys():
105  fromInput[float(k)]=int(il)
106  else:
107  fromInput[float(ik)]=int(il)
108  else:
109  if i=='all':
110  for k in self.relvalModule.workflows.keys():
111  fromInput[float(k)]=0
112  else:
113  fromInput[float(i)]=0
114 
115  if fromScratch:
116  fromScratch=map(float,fromScratch)
117  for num in fromScratch:
118  if num in fromInput:
119  fromInput.pop(num)
120  #overwrite steps
121  if self.overWrite:
122  for p in self.overWrite:
123  self.relvalModule.steps.overwrite(p)
124 
125  #change the origin of dataset on the fly
126  if refRel:
127  if ',' in refRel:
128  refRels=refRel.split(',')
129  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
130  return
131  self.relvalModule.changeRefRelease(
132  self.relvalModule.steps,
133  zip(self.relvalModule.baseDataSetRelease,refRels)
134  )
135  else:
136  self.relvalModule.changeRefRelease(
137  self.relvalModule.steps,
138  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
139  )
140 
141 
142  for num, wfInfo in self.relvalModule.workflows.items():
143  commands=[]
144  wfName = wfInfo[0]
145  stepList = wfInfo[1]
146  # if no explicit name given for the workflow, use the name of step1
147  if wfName.strip() == '': wfName = stepList[0]
148  # option to specialize the wf as the third item in the WF list
149  addTo=None
150  addCom=None
151  if len(wfInfo)>=3:
152  addCom=wfInfo[2]
153  if not type(addCom)==list: addCom=[addCom]
154  #print 'added dict',addCom
155  if len(wfInfo)>=4:
156  addTo=wfInfo[3]
157  #pad with 0
158  while len(addTo)!=len(stepList):
159  addTo.append(0)
160 
161  name=wfName
162  stepIndex=0
163  ranStepList=[]
164 
165  #first resolve INPUT possibilities
166  if num in fromInput:
167  ilevel=fromInput[num]
168  #print num,ilevel
169  for (stepIr,step) in enumerate(reversed(stepList)):
170  stepName=step
171  stepI=(len(stepList)-stepIr)-1
172  #print stepIr,step,stepI,ilevel
173  if stepI>ilevel:
174  #print "ignoring"
175  continue
176  if stepI!=0:
177  testName='__'.join(stepList[0:stepI+1])+'INPUT'
178  else:
179  testName=step+'INPUT'
180  #print "JR",stepI,stepIr,testName,stepList
181  if testName in self.relvalModule.steps.keys():
182  #print "JR",stepI,stepIr
183  stepList[stepI]=testName
184  #pop the rest in the list
185  #print "\tmod prepop",stepList
186  for p in range(stepI):
187  stepList.pop(0)
188  #print "\t\tmod",stepList
189  break
190 
191 
192  for (stepI,step) in enumerate(stepList):
193  stepName=step
194  if self.wm:
195  #cannot put a certain number of things in wm
196  if stepName in ['HARVEST','HARVESTD','HARVESTDreHLT','RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT']:
197  continue
198 
199  #replace stepName is needed
200  #if stepName in self.replaceStep
201  if len(name) > 0 : name += '+'
202  #any step can be mirrored with INPUT
203  ## maybe we want too level deep input
204  """
205  if num in fromInput:
206  if step+'INPUT' in self.relvalModule.steps.keys():
207  stepName = step+"INPUT"
208  stepList.remove(step)
209  stepList.insert(stepIndex,stepName)
210  """
211  name += stepName
212 
213  if addCom and (not addTo or addTo[stepIndex]==1):
215  copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
216  cfg, input, opts = self.makeCmd(copyStep)
217  else:
218  cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
219 
220  if input and cfg :
221  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
222  raise MatrixException(msg)
223 
224  if input:
225  cmd = input
226  else:
227  if cfg:
228  cmd = 'cmsDriver.py '+cfg+' '+opts
229  else:
230  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
231  if self.wm:
232  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
233  if self.addCommand:
234  cmd +=' '+self.addCommand
235  if self.wm:
236  cmd=cmd.replace('DQMROOT','DQM')
237  cmd=cmd.replace('--filetype DQM','')
238  commands.append(cmd)
239  ranStepList.append(stepName)
240  stepIndex+=1
241 
242  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
243 
244  return
245 
246 
247  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
248 
249  if selected:
250  selected=map(float,selected)
251  for matrixFile in self.files:
252 
253  self.reset(what)
254 
255  if self.what != 'all' and self.what not in matrixFile:
256  print "ignoring non-requested file",matrixFile
257  continue
258 
259  try:
260  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
261  except Exception, e:
262  print "ERROR reading file:", matrixFile, str(e)
263  raise
264 
265  if not self.workFlowSteps: continue
266 
267  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
268  outFile = open(dataFileName,'w')
269 
270  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
271  ids = self.workFlowSteps.keys()
272  ids.sort()
273  indexAndSteps=[]
274 
275  writtenWF=0
276  for key in ids:
277  if selected and not (key[0] in selected):
278  continue
279  #trick to skip the HImix IB test
280  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
281  num, name, commands, stepList = self.workFlowSteps[key]
282 
283  wfName,stepNames= name.split('+',1)
284 
285  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
286  stepNames=stepNames.replace('+SKIMCOSD','')
287  stepNames=stepNames.replace('+SKIMD','')
288  if 'HARVEST' in stepNames:
289  #find out automatically what to remove
290  exactb=stepNames.index('+HARVEST')
291  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
292  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
293  otherSteps = None
294  if '+' in stepNames:
295  step1,otherSteps = stepNames.split('+',1)
296 
297  line = str(num) + ' ++ '+ wfName
298  if otherSteps and not step1Only:
299  line += ' ++ ' +otherSteps.replace('+',',')
300  else:
301  line += ' ++ none'
302  inputInfo=None
303  if not isinstance(commands[0],str):
304  inputInfo=commands[0]
305  if otherSteps:
306  for (i,c) in enumerate(otherSteps.split('+')):
307  #pad with set
308  for p in range(len(indexAndSteps),i+2):
309  indexAndSteps.append(set())
310  indexAndSteps[i+1].add((c,commands[i+1]))
311 
312  if inputInfo :
313  #skip the samples from INPUT when step1Only is on
314  if step1Only: continue
315  line += ' ++ REALDATA: '+inputInfo.dataSet
316  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
317  line += ', FILES: ' +str(inputInfo.files)
318  line += ', EVENTS: '+str(inputInfo.events)
319  if inputInfo.label!='':
320  line += ', LABEL: ' +inputInfo.label
321  line += ', LOCATION:'+inputInfo.location
322  line += ' @@@'
323  else:
324  line += ' @@@ '+commands[0]
325  line=line.replace('DQMROOT','DQM')
326  writtenWF+=1
327  outFile.write(line+'\n')
328 
329 
330  outFile.write('\n'+'\n')
331  if step1Only: continue
332 
333  for (index,s) in enumerate(indexAndSteps):
334  for (stepName,cmd) in s:
335  stepIndex=index+1
336  if 'dasquery.log' in cmd: continue
337  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
338  line=line.replace('DQMROOT','DQM')
339  outFile.write(line+'\n')
340  outFile.write('\n'+'\n')
341  outFile.close()
342  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
343  return
344 
345 
346  def showWorkFlows(self, selected=None, extended=True):
347  if selected: selected = map(float,selected)
348  maxLen = 100 # for summary, limit width of output
349  fmt1 = "%-6s %-35s [1]: %s ..."
350  fmt2 = " %35s [%d]: %s ..."
351  print "\nfound a total of ", len(self.workFlows), ' workflows:'
352  if selected:
353  print " of which the following", len(selected), 'were selected:'
354  #-ap for now:
355  maxLen = -1 # for individual listing, no limit on width
356  fmt1 = "%-6s %-35s [1]: %s "
357  fmt2 = " %35s [%d]: %s"
358 
359  N=[]
360  for wf in self.workFlows:
361  if selected and float(wf.numId) not in selected: continue
362  if extended: print ''
363  #pad with zeros
364  for i in range(len(N),len(wf.cmds)): N.append(0)
365  N[len(wf.cmds)-1]+=1
366  wfName, stepNames = wf.nameId.split('+',1)
367  for i,s in enumerate(wf.cmds):
368  if extended:
369  if i==0:
370  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
371  else:
372  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
373  else:
374  print "%-6s %-35s "% (wf.numId, stepNames)
375  break
376  print ''
377  for i,n in enumerate(N):
378  if n: print n,'workflows with',i+1,'steps'
379 
380  return
381 
382  def createWorkFlows(self, fileNameIn):
383 
384  prefixIn = self.filesPrefMap[fileNameIn]
385 
386  # get through the list of items and update the requested workflows only
387  keyList = self.workFlowSteps.keys()
388  ids = []
389  for item in keyList:
390  id, pref = item
391  if pref != prefixIn : continue
392  ids.append(id)
393  ids.sort()
394  for key in ids:
395  val = self.workFlowSteps[(key,prefixIn)]
396  num, name, commands, stepList = val
397  nameId = str(num)+'_'+name
398  if nameId in self.nameList:
399  print "==> duplicate name found for ", nameId
400  print ' keeping : ', self.nameList[nameId]
401  print ' ignoring : ', val
402  else:
403  self.nameList[nameId] = val
404 
405  self.workFlows.append(WorkFlow(num, name, commands=commands))
406 
407  return
408 
409  def prepare(self, useInput=None, refRel='', fromScratch=None):
410 
411  for matrixFile in self.files:
412  if self.what != 'all' and self.what not in matrixFile:
413  print "ignoring non-requested file",matrixFile
414  continue
415 
416  try:
417  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
418  except Exception, e:
419  print "ERROR reading file:", matrixFile, str(e)
420  raise
421 
422  try:
423  self.createWorkFlows(matrixFile)
424  except Exception, e:
425  print "ERROR creating workflows :", str(e)
426  raise
427 
428 
429  def show(self, selected=None, extended=True):
430 
431  self.showWorkFlows(selected,extended)
432  print '\n','-'*80,'\n'
433 
434 
435  def updateDB(self):
436 
437  import pickle
438  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
439 
440  return
441 
Definition: merge.py:1
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
dictionary map
Definition: Association.py:205
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77
void set(const std::string &name, int value)
set the flag, with a run-time name