CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MatrixReader.py
Go to the documentation of this file.
1 import sys, os
2 
3 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
4 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
5 
6 # ================================================================================
7 
9  def __init__(self, msg):
10  self.msg = msg
11  def __str__(self):
12  return self.msg
13 
14 # ================================================================================
15 
16 class MatrixReader(object):
17 
18  def __init__(self, opt):
19 
20  self.reset(opt.what)
21 
22  self.wm=opt.wmcontrol
23  self.revertDqmio=opt.revertDqmio
24  self.addCommand=opt.command
25  self.apply=opt.apply
26  self.commandLineWf=opt.workflow
27  self.overWrite=opt.overWrite
28 
29  self.noRun = opt.noRun
30  return
31 
32  def reset(self, what='all'):
33 
34  self.what = what
35 
36  #a bunch of information, but not yet the WorkFlow object
37  self.workFlowSteps = {}
38  #the actual WorkFlow objects
39  self.workFlows = []
40  self.nameList = {}
41 
42  self.filesPrefMap = {'relval_standard' : 'std-' ,
43  'relval_highstats': 'hi-' ,
44  'relval_pileup': 'PU-' ,
45  'relval_generator': 'gen-',
46  'relval_extendedgen': 'genExt-',
47  'relval_production': 'prod-' ,
48  'relval_ged': 'ged-',
49  'relval_upgrade':'upg-',
50  'relval_2017':'2017-',
51  'relval_identity':'id-',
52  'relval_machine': 'mach-',
53  'relval_unsch': 'unsch-',
54  'relval_premix': 'premix-'
55  }
56 
57  self.files = ['relval_standard' ,
58  'relval_highstats',
59  'relval_pileup',
60  'relval_generator',
61  'relval_extendedgen',
62  'relval_production',
63  'relval_ged',
64  'relval_upgrade',
65  'relval_2017',
66  'relval_identity',
67  'relval_machine',
68  'relval_unsch',
69  'relval_premix'
70  ]
71  self.filesDefault = {'relval_standard':True ,
72  'relval_highstats':True ,
73  'relval_pileup':True,
74  'relval_generator':True,
75  'relval_extendedgen':True,
76  'relval_production':True,
77  'relval_ged':True,
78  'relval_upgrade':False,
79  'relval_2017':True,
80  'relval_identity':False,
81  'relval_machine':True,
82  'relval_unsch':True,
83  'relval_premix':True
84  }
85 
86  self.relvalModule = None
87 
88  return
89 
90  def makeCmd(self, step):
91 
92  cmd = ''
93  cfg = None
94  input = None
95  for k,v in step.items():
96  if 'no_exec' in k : continue # we want to really run it ...
97  if k.lower() == 'cfg':
98  cfg = v
99  continue # do not append to cmd, return separately
100  if k.lower() == 'input':
101  input = v
102  continue # do not append to cmd, return separately
103 
104  #chain the configs
105  #if k.lower() == '--python':
106  # v = 'step%d_%s'%(index,v)
107  cmd += ' ' + k + ' ' + str(v)
108  return cfg, input, cmd
109 
110  def makeStep(self,step,overrides):
112  if len(overrides.keys()) > 0:
113  copyStep=merge([overrides]+[step])
114  return copyStep
115  else:
116  return step
117 
118  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
119 
120  prefix = self.filesPrefMap[fileNameIn]
121 
122  print "processing", fileNameIn
123 
124  try:
125  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
126  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
127  except Exception as e:
128  print "ERROR importing file ", fileNameIn, str(e)
129  return
130 
131  if useInput is not None:
132  print "request for INPUT for ", useInput
133 
134 
135  fromInput={}
136 
137  if useInput:
138  for i in useInput:
139  if ':' in i:
140  (ik,il)=i.split(':')
141  if ik=='all':
142  for k in self.relvalModule.workflows.keys():
143  fromInput[float(k)]=int(il)
144  else:
145  fromInput[float(ik)]=int(il)
146  else:
147  if i=='all':
148  for k in self.relvalModule.workflows.keys():
149  fromInput[float(k)]=0
150  else:
151  fromInput[float(i)]=0
152 
153  if fromScratch:
154  fromScratch=map(float,fromScratch)
155  for num in fromScratch:
156  if num in fromInput:
157  fromInput.pop(num)
158  #overwrite steps
159  if self.overWrite:
160  for p in self.overWrite:
161  self.relvalModule.steps.overwrite(p)
162 
163  #change the origin of dataset on the fly
164  if refRel:
165  if ',' in refRel:
166  refRels=refRel.split(',')
167  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
168  return
169  self.relvalModule.changeRefRelease(
170  self.relvalModule.steps,
171  list(zip(self.relvalModule.baseDataSetRelease,refRels))
172  )
173  else:
174  self.relvalModule.changeRefRelease(
175  self.relvalModule.steps,
176  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
177  )
178 
179 
180  for num, wfInfo in self.relvalModule.workflows.items():
181  commands=[]
182  wfName = wfInfo[0]
183  stepList = wfInfo[1]
184  stepOverrides=wfInfo.overrides
185  # if no explicit name given for the workflow, use the name of step1
186  if wfName.strip() == '': wfName = stepList[0]
187  # option to specialize the wf as the third item in the WF list
188  addTo=None
189  addCom=None
190  if len(wfInfo)>=3:
191  addCom=wfInfo[2]
192  if not type(addCom)==list: addCom=[addCom]
193  #print 'added dict',addCom
194  if len(wfInfo)>=4:
195  addTo=wfInfo[3]
196  #pad with 0
197  while len(addTo)!=len(stepList):
198  addTo.append(0)
199 
200  name=wfName
201  stepIndex=0
202  ranStepList=[]
203 
204  #first resolve INPUT possibilities
205  if num in fromInput:
206  ilevel=fromInput[num]
207  #print num,ilevel
208  for (stepIr,step) in enumerate(reversed(stepList)):
209  stepName=step
210  stepI=(len(stepList)-stepIr)-1
211  #print stepIr,step,stepI,ilevel
212  if stepI>ilevel:
213  #print "ignoring"
214  continue
215  if stepI!=0:
216  testName='__'.join(stepList[0:stepI+1])+'INPUT'
217  else:
218  testName=step+'INPUT'
219  #print "JR",stepI,stepIr,testName,stepList
220  if testName in self.relvalModule.steps.keys():
221  #print "JR",stepI,stepIr
222  stepList[stepI]=testName
223  #pop the rest in the list
224  #print "\tmod prepop",stepList
225  for p in range(stepI):
226  stepList.pop(0)
227  #print "\t\tmod",stepList
228  break
229 
230 
231  for (stepI,step) in enumerate(stepList):
232  stepName=step
233  if self.wm:
234  #cannot put a certain number of things in wm
235  if stepName in [
236  #'HARVEST','HARVESTD','HARVESTDreHLT',
237  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
238  ]:
239  continue
240 
241  #replace stepName is needed
242  #if stepName in self.replaceStep
243  if len(name) > 0 : name += '+'
244  #any step can be mirrored with INPUT
245  ## maybe we want too level deep input
246  """
247  if num in fromInput:
248  if step+'INPUT' in self.relvalModule.steps.keys():
249  stepName = step+"INPUT"
250  stepList.remove(step)
251  stepList.insert(stepIndex,stepName)
252  """
253  name += stepName
254  if addCom and (not addTo or addTo[stepIndex]==1):
256  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
257  cfg, input, opts = self.makeCmd(copyStep)
258  else:
259  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
260 
261  if input and cfg :
262  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
263  raise MatrixException(msg)
264 
265  if input:
266  cmd = input
267  if self.noRun:
268  cmd.run=[]
269  else:
270  if cfg:
271  cmd = 'cmsDriver.py '+cfg+' '+opts
272  else:
273  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
274  if self.wm:
275  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
276  if self.addCommand:
277  if self.apply:
278  if stepIndex in self.apply or stepName in self.apply:
279  cmd +=' '+self.addCommand
280  else:
281  cmd +=' '+self.addCommand
282  if self.wm and self.revertDqmio=='yes':
283  cmd=cmd.replace('DQMIO','DQM')
284  cmd=cmd.replace('--filetype DQM','')
285  commands.append(cmd)
286  ranStepList.append(stepName)
287  stepIndex+=1
288 
289  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
290 
291  return
292 
293 
294  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
295 
296  if selected:
297  selected=map(float,selected)
298  for matrixFile in self.files:
299 
300  self.reset(what)
301 
302  if self.what != 'all' and self.what not in matrixFile:
303  print "ignoring non-requested file",matrixFile
304  continue
305 
306  if self.what == 'all' and not self.filesDefault[matrixFile]:
307  print "ignoring file not used by default (enable with -w)",matrixFile
308  continue
309 
310  try:
311  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
312  except Exception as e:
313  print "ERROR reading file:", matrixFile, str(e)
314  raise
315 
316  if not self.workFlowSteps: continue
317 
318  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
319  outFile = open(dataFileName,'w')
320 
321  print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
322  ids = self.workFlowSteps.keys()
323  ids.sort()
324  indexAndSteps=[]
325 
326  writtenWF=0
327  for key in ids:
328  if selected and not (key[0] in selected):
329  continue
330  #trick to skip the HImix IB test
331  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
332  num, name, commands, stepList = self.workFlowSteps[key]
333 
334  wfName,stepNames= name.split('+',1)
335 
336  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
337  stepNames=stepNames.replace('+SKIMCOSD','')
338  stepNames=stepNames.replace('+SKIMD','')
339  if 'HARVEST' in stepNames:
340  #find out automatically what to remove
341  exactb=stepNames.index('+HARVEST')
342  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
343  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
344  otherSteps = None
345  if '+' in stepNames:
346  step1,otherSteps = stepNames.split('+',1)
347 
348  line = str(num) + ' ++ '+ wfName
349  if otherSteps and not step1Only:
350  line += ' ++ ' +otherSteps.replace('+',',')
351  else:
352  line += ' ++ none'
353  inputInfo=None
354  if not isinstance(commands[0],str):
355  inputInfo=commands[0]
356  if otherSteps:
357  for (i,c) in enumerate(otherSteps.split('+')):
358  #pad with set
359  for p in range(len(indexAndSteps),i+2):
360  indexAndSteps.append(set())
361  indexAndSteps[i+1].add((c,commands[i+1]))
362 
363  if inputInfo :
364  #skip the samples from INPUT when step1Only is on
365  if step1Only: continue
366  line += ' ++ REALDATA: '+inputInfo.dataSet
367  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
368  line += ', FILES: ' +str(inputInfo.files)
369  line += ', EVENTS: '+str(inputInfo.events)
370  if inputInfo.label!='':
371  line += ', LABEL: ' +inputInfo.label
372  line += ', LOCATION:'+inputInfo.location
373  line += ' @@@'
374  else:
375  line += ' @@@ '+commands[0]
376  if self.revertDqmio=='yes':
377  line=line.replace('DQMIO','DQM')
378  writtenWF+=1
379  outFile.write(line+'\n')
380 
381 
382  outFile.write('\n'+'\n')
383  if step1Only: continue
384 
385  for (index,s) in enumerate(indexAndSteps):
386  for (stepName,cmd) in s:
387  stepIndex=index+1
388  if 'dasquery.log' in cmd: continue
389  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
390  if self.revertDqmio=='yes':
391  line=line.replace('DQMIO','DQM')
392  outFile.write(line+'\n')
393  outFile.write('\n'+'\n')
394  outFile.close()
395  print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
396  return
397 
398  def workFlowsByLocation(self, cafVeto=True):
399  # Check if we are on CAF
400  onCAF = False
401  if 'cms/caf/cms' in os.environ['CMS_PATH']:
402  onCAF = True
403 
404  workflows = []
405  for workflow in self.workFlows:
406  if isinstance(workflow.cmds[0], InputInfo):
407  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
408  continue
409  workflows.append(workflow)
410 
411  return workflows
412 
413  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
414  if selected: selected = map(float,selected)
415  wfs = self.workFlowsByLocation(cafVeto)
416  maxLen = 100 # for summary, limit width of output
417  fmt1 = "%-6s %-35s [1]: %s ..."
418  fmt2 = " %35s [%d]: %s ..."
419  print "\nfound a total of ", len(wfs), ' workflows:'
420  if selected:
421  print " of which the following", len(selected), 'were selected:'
422  #-ap for now:
423  maxLen = -1 # for individual listing, no limit on width
424  fmt1 = "%-6s %-35s [1]: %s "
425  fmt2 = " %35s [%d]: %s"
426 
427  N=[]
428  for wf in wfs:
429  if selected and float(wf.numId) not in selected: continue
430  if extended: print ''
431  #pad with zeros
432  for i in range(len(N),len(wf.cmds)): N.append(0)
433  N[len(wf.cmds)-1]+=1
434  wfName, stepNames = wf.nameId.split('+',1)
435  for i,s in enumerate(wf.cmds):
436  if extended:
437  if i==0:
438  print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
439  else:
440  print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
441  else:
442  print "%-6s %-35s "% (wf.numId, stepNames)
443  break
444  print ''
445  for i,n in enumerate(N):
446  if n: print n,'workflows with',i+1,'steps'
447 
448  return
449 
450  def createWorkFlows(self, fileNameIn):
451 
452  prefixIn = self.filesPrefMap[fileNameIn]
453 
454  # get through the list of items and update the requested workflows only
455  keyList = self.workFlowSteps.keys()
456  ids = []
457  for item in keyList:
458  id, pref = item
459  if pref != prefixIn : continue
460  ids.append(id)
461  ids.sort()
462  for key in ids:
463  val = self.workFlowSteps[(key,prefixIn)]
464  num, name, commands, stepList = val
465  nameId = str(num)+'_'+name
466  if nameId in self.nameList:
467  print "==> duplicate name found for ", nameId
468  print ' keeping : ', self.nameList[nameId]
469  print ' ignoring : ', val
470  else:
471  self.nameList[nameId] = val
472 
473  self.workFlows.append(WorkFlow(num, name, commands=commands))
474 
475  return
476 
477  def prepare(self, useInput=None, refRel='', fromScratch=None):
478 
479  for matrixFile in self.files:
480  if self.what != 'all' and self.what not in matrixFile:
481  print "ignoring non-requested file",matrixFile
482  continue
483  if self.what == 'all' and not self.filesDefault[matrixFile]:
484  print "ignoring",matrixFile,"from default matrix"
485  continue
486 
487  try:
488  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
489  except Exception as e:
490  print "ERROR reading file:", matrixFile, str(e)
491  raise
492 
493  try:
494  self.createWorkFlows(matrixFile)
495  except Exception as e:
496  print "ERROR creating workflows :", str(e)
497  raise
498 
499 
500  def show(self, selected=None, extended=True, cafVeto=True):
501 
502  self.showWorkFlows(selected, extended, cafVeto)
503  print '\n','-'*80,'\n'
504 
505 
506  def updateDB(self):
507 
508  import pickle
509  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
510 
511  return
512 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:23
void add(const std::vector< const T * > &source, std::vector< const T * > &dest)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run