CMS 3D CMS Logo

MatrixReader.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import sys, os
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
6 
7 # ================================================================================
8 
10  def __init__(self, msg):
11  self.msg = msg
12  def __str__(self):
13  return self.msg
14 
15 # ================================================================================
16 
18 
19  def __init__(self, opt):
20 
21  self.reset(opt.what)
22 
23  self.wm=opt.wmcontrol
24  self.revertDqmio=opt.revertDqmio
25  self.addCommand=opt.command
26  self.apply=opt.apply
27  self.commandLineWf=opt.workflow
28  self.overWrite=opt.overWrite
29 
30  self.noRun = opt.noRun
31  return
32 
33  def reset(self, what='all'):
34 
35  self.what = what
36 
37  #a bunch of information, but not yet the WorkFlow object
38  self.workFlowSteps = {}
39  #the actual WorkFlow objects
40  self.workFlows = []
41  self.nameList = {}
42 
43  self.filesPrefMap = {'relval_standard' : 'std-' ,
44  'relval_highstats': 'hi-' ,
45  'relval_pileup': 'PU-' ,
46  'relval_generator': 'gen-',
47  'relval_extendedgen': 'genExt-',
48  'relval_production': 'prod-' ,
49  'relval_ged': 'ged-',
50  'relval_upgrade':'upg-',
51  'relval_2017':'2017-',
52  'relval_2023':'2023-',
53  'relval_identity':'id-',
54  'relval_machine': 'mach-',
55  'relval_unsch': 'unsch-',
56  'relval_premix': 'premix-'
57  }
58 
59  self.files = ['relval_standard' ,
60  'relval_highstats',
61  'relval_pileup',
62  'relval_generator',
63  'relval_extendedgen',
64  'relval_production',
65  'relval_ged',
66  'relval_upgrade',
67  'relval_2017',
68  'relval_2023',
69  'relval_identity',
70  'relval_machine',
71  'relval_unsch',
72  'relval_premix'
73  ]
74  self.filesDefault = {'relval_standard':True ,
75  'relval_highstats':True ,
76  'relval_pileup':True,
77  'relval_generator':True,
78  'relval_extendedgen':True,
79  'relval_production':True,
80  'relval_ged':True,
81  'relval_upgrade':False,
82  'relval_2017':True,
83  'relval_2023':True,
84  'relval_identity':False,
85  'relval_machine':True,
86  'relval_unsch':True,
87  'relval_premix':True
88  }
89 
90  self.relvalModule = None
91 
92  return
93 
94  def makeCmd(self, step):
95 
96  cmd = ''
97  cfg = None
98  input = None
99  for k,v in step.items():
100  if 'no_exec' in k : continue # we want to really run it ...
101  if k.lower() == 'cfg':
102  cfg = v
103  continue # do not append to cmd, return separately
104  if k.lower() == 'input':
105  input = v
106  continue # do not append to cmd, return separately
107 
108  #chain the configs
109  #if k.lower() == '--python':
110  # v = 'step%d_%s'%(index,v)
111  cmd += ' ' + k + ' ' + str(v)
112  return cfg, input, cmd
113 
114  def makeStep(self,step,overrides):
116  if len(overrides) > 0:
117  copyStep=merge([overrides]+[step])
118  return copyStep
119  else:
120  return step
121 
122  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
123 
124  prefix = self.filesPrefMap[fileNameIn]
125 
126  print("processing", fileNameIn)
127 
128  try:
129  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
130  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
131  except Exception as e:
132  print("ERROR importing file ", fileNameIn, str(e))
133  return
134 
135  if useInput is not None:
136  print("request for INPUT for ", useInput)
137 
138 
139  fromInput={}
140 
141  if useInput:
142  for i in useInput:
143  if ':' in i:
144  (ik,il)=i.split(':')
145  if ik=='all':
146  for k in self.relvalModule.workflows.keys():
147  fromInput[float(k)]=int(il)
148  else:
149  fromInput[float(ik)]=int(il)
150  else:
151  if i=='all':
152  for k in self.relvalModule.workflows.keys():
153  fromInput[float(k)]=0
154  else:
155  fromInput[float(i)]=0
156 
157  if fromScratch:
158  fromScratch=map(float,fromScratch)
159  for num in fromScratch:
160  if num in fromInput:
161  fromInput.pop(num)
162  #overwrite steps
163  if self.overWrite:
164  for p in self.overWrite:
165  self.relvalModule.steps.overwrite(p)
166 
167  #change the origin of dataset on the fly
168  if refRel:
169  if ',' in refRel:
170  refRels=refRel.split(',')
171  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
172  return
173  self.relvalModule.changeRefRelease(
174  self.relvalModule.steps,
175  list(zip(self.relvalModule.baseDataSetRelease,refRels))
176  )
177  else:
178  self.relvalModule.changeRefRelease(
179  self.relvalModule.steps,
180  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
181  )
182 
183 
184  for num, wfInfo in self.relvalModule.workflows.items():
185  commands=[]
186  wfName = wfInfo[0]
187  stepList = wfInfo[1]
188  stepOverrides=wfInfo.overrides
189  # if no explicit name given for the workflow, use the name of step1
190  if wfName.strip() == '': wfName = stepList[0]
191  # option to specialize the wf as the third item in the WF list
192  addTo=None
193  addCom=None
194  if len(wfInfo)>=3:
195  addCom=wfInfo[2]
196  if not isinstance(addCom, list): addCom=[addCom]
197  #print 'added dict',addCom
198  if len(wfInfo)>=4:
199  addTo=wfInfo[3]
200  #pad with 0
201  while len(addTo)!=len(stepList):
202  addTo.append(0)
203 
204  name=wfName
205  stepIndex=0
206  ranStepList=[]
207 
208  #first resolve INPUT possibilities
209  if num in fromInput:
210  ilevel=fromInput[num]
211  #print num,ilevel
212  for (stepIr,step) in enumerate(reversed(stepList)):
213  stepName=step
214  stepI=(len(stepList)-stepIr)-1
215  #print stepIr,step,stepI,ilevel
216  if stepI>ilevel:
217  #print "ignoring"
218  continue
219  if stepI!=0:
220  testName='__'.join(stepList[0:stepI+1])+'INPUT'
221  else:
222  testName=step+'INPUT'
223  #print "JR",stepI,stepIr,testName,stepList
224  if testName in self.relvalModule.steps.keys():
225  #print "JR",stepI,stepIr
226  stepList[stepI]=testName
227  #pop the rest in the list
228  #print "\tmod prepop",stepList
229  for p in range(stepI):
230  stepList.pop(0)
231  #print "\t\tmod",stepList
232  break
233 
234 
235  for (stepI,step) in enumerate(stepList):
236  stepName=step
237  if self.relvalModule.steps[stepName] is None:
238  continue
239  if self.wm:
240  #cannot put a certain number of things in wm
241  if stepName in [
242  #'HARVEST','HARVESTD','HARVESTDreHLT',
243  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
244  ]:
245  continue
246 
247  #replace stepName is needed
248  #if stepName in self.replaceStep
249  if len(name) > 0 : name += '+'
250  #any step can be mirrored with INPUT
251  ## maybe we want too level deep input
252  """
253  if num in fromInput:
254  if step+'INPUT' in self.relvalModule.steps.keys():
255  stepName = step+"INPUT"
256  stepList.remove(step)
257  stepList.insert(stepIndex,stepName)
258  """
259  name += stepName
260  if addCom and (not addTo or addTo[stepIndex]==1):
262  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
263  cfg, input, opts = self.makeCmd(copyStep)
264  else:
265  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
266 
267  if input and cfg :
268  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
269  raise MatrixException(msg)
270 
271  if input:
272  cmd = input
273  if self.noRun:
274  cmd.run=[]
275  else:
276  if cfg:
277  cmd = 'cmsDriver.py '+cfg+' '+opts
278  else:
279  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
280  if self.wm:
281  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
282  if self.addCommand:
283  if self.apply:
284  if stepIndex in self.apply or stepName in self.apply:
285  cmd +=' '+self.addCommand
286  else:
287  cmd +=' '+self.addCommand
288  if self.wm and self.revertDqmio=='yes':
289  cmd=cmd.replace('DQMIO','DQM')
290  cmd=cmd.replace('--filetype DQM','')
291  commands.append(cmd)
292  ranStepList.append(stepName)
293  stepIndex+=1
294 
295  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
296 
297  return
298 
299 
300  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
301 
302  if selected:
303  selected=map(float,selected)
304  for matrixFile in self.files:
305 
306  self.reset(what)
307 
308  if self.what != 'all' and self.what not in matrixFile:
309  print("ignoring non-requested file",matrixFile)
310  continue
311 
312  if self.what == 'all' and not self.filesDefault[matrixFile]:
313  print("ignoring file not used by default (enable with -w)",matrixFile)
314  continue
315 
316  try:
317  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
318  except Exception as e:
319  print("ERROR reading file:", matrixFile, str(e))
320  raise
321 
322  if not self.workFlowSteps: continue
323 
324  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
325  outFile = open(dataFileName,'w')
326 
327  print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
328  ids = sorted(self.workFlowSteps.keys())
329  indexAndSteps=[]
330 
331  writtenWF=0
332  for key in ids:
333  if selected and not (key[0] in selected):
334  continue
335  #trick to skip the HImix IB test
336  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
337  num, name, commands, stepList = self.workFlowSteps[key]
338 
339  wfName,stepNames= name.split('+',1)
340 
341  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
342  stepNames=stepNames.replace('+SKIMCOSD','')
343  stepNames=stepNames.replace('+SKIMD','')
344  if 'HARVEST' in stepNames:
345  #find out automatically what to remove
346  exactb=stepNames.index('+HARVEST')
347  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
348  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
349  otherSteps = None
350  if '+' in stepNames:
351  step1,otherSteps = stepNames.split('+',1)
352 
353  line = str(num) + ' ++ '+ wfName
354  if otherSteps and not step1Only:
355  line += ' ++ ' +otherSteps.replace('+',',')
356  else:
357  line += ' ++ none'
358  inputInfo=None
359  if not isinstance(commands[0],str):
360  inputInfo=commands[0]
361  if otherSteps:
362  for (i,c) in enumerate(otherSteps.split('+')):
363  #pad with set
364  for p in range(len(indexAndSteps),i+2):
365  indexAndSteps.append(set())
366  indexAndSteps[i+1].add((c,commands[i+1]))
367 
368  if inputInfo :
369  #skip the samples from INPUT when step1Only is on
370  if step1Only: continue
371  line += ' ++ REALDATA: '+inputInfo.dataSet
372  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
373  line += ', FILES: ' +str(inputInfo.files)
374  line += ', EVENTS: '+str(inputInfo.events)
375  if inputInfo.label!='':
376  line += ', LABEL: ' +inputInfo.label
377  line += ', LOCATION:'+inputInfo.location
378  line += ' @@@'
379  else:
380  line += ' @@@ '+commands[0]
381  if self.revertDqmio=='yes':
382  line=line.replace('DQMIO','DQM')
383  writtenWF+=1
384  outFile.write(line+'\n')
385 
386 
387  outFile.write('\n'+'\n')
388  if step1Only: continue
389 
390  for (index,s) in enumerate(indexAndSteps):
391  for (stepName,cmd) in s:
392  stepIndex=index+1
393  if 'dasquery.log' in cmd: continue
394  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
395  if self.revertDqmio=='yes':
396  line=line.replace('DQMIO','DQM')
397  outFile.write(line+'\n')
398  outFile.write('\n'+'\n')
399  outFile.close()
400  print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
401  return
402 
403  def workFlowsByLocation(self, cafVeto=True):
404  # Check if we are on CAF
405  onCAF = False
406  if 'cms/caf/cms' in os.environ['CMS_PATH']:
407  onCAF = True
408 
409  workflows = []
410  for workflow in self.workFlows:
411  if isinstance(workflow.cmds[0], InputInfo):
412  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
413  continue
414  workflows.append(workflow)
415 
416  return workflows
417 
418  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
419  if selected: selected = map(float,selected)
420  wfs = self.workFlowsByLocation(cafVeto)
421  maxLen = 100 # for summary, limit width of output
422  fmt1 = "%-6s %-35s [1]: %s ..."
423  fmt2 = " %35s [%d]: %s ..."
424  print("\nfound a total of ", len(wfs), ' workflows:')
425  if selected:
426  print(" of which the following", len(selected), 'were selected:')
427  #-ap for now:
428  maxLen = -1 # for individual listing, no limit on width
429  fmt1 = "%-6s %-35s [1]: %s "
430  fmt2 = " %35s [%d]: %s"
431 
432  N=[]
433  for wf in wfs:
434  if selected and float(wf.numId) not in selected: continue
435  if extended: print('')
436  #pad with zeros
437  for i in range(len(N),len(wf.cmds)): N.append(0)
438  N[len(wf.cmds)-1]+=1
439  wfName, stepNames = wf.nameId.split('+',1)
440  for i,s in enumerate(wf.cmds):
441  if extended:
442  if i==0:
443  print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
444  else:
445  print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
446  else:
447  print("%-6s %-35s "% (wf.numId, stepNames))
448  break
449  print('')
450  for i,n in enumerate(N):
451  if n: print(n,'workflows with',i+1,'steps')
452 
453  return
454 
455  def createWorkFlows(self, fileNameIn):
456 
457  prefixIn = self.filesPrefMap[fileNameIn]
458 
459  # get through the list of items and update the requested workflows only
460  keyList = self.workFlowSteps.keys()
461  ids = []
462  for item in keyList:
463  id, pref = item
464  if pref != prefixIn : continue
465  ids.append(id)
466  ids.sort()
467  for key in ids:
468  val = self.workFlowSteps[(key,prefixIn)]
469  num, name, commands, stepList = val
470  nameId = str(num)+'_'+name
471  if nameId in self.nameList:
472  print("==> duplicate name found for ", nameId)
473  print(' keeping : ', self.nameList[nameId])
474  print(' ignoring : ', val)
475  else:
476  self.nameList[nameId] = val
477 
478  self.workFlows.append(WorkFlow(num, name, commands=commands))
479 
480  return
481 
482  def prepare(self, useInput=None, refRel='', fromScratch=None):
483 
484  for matrixFile in self.files:
485  if self.what != 'all' and self.what not in matrixFile:
486  print("ignoring non-requested file",matrixFile)
487  continue
488  if self.what == 'all' and not self.filesDefault[matrixFile]:
489  print("ignoring",matrixFile,"from default matrix")
490  continue
491 
492  try:
493  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
494  except Exception as e:
495  print("ERROR reading file:", matrixFile, str(e))
496  raise
497 
498  try:
499  self.createWorkFlows(matrixFile)
500  except Exception as e:
501  print("ERROR creating workflows :", str(e))
502  raise
503 
504 
505  def show(self, selected=None, extended=True, cafVeto=True):
506 
507  self.showWorkFlows(selected, extended, cafVeto)
508  print('\n','-'*80,'\n')
509 
510 
511  def updateDB(self):
512 
513  import pickle
514  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
515 
516  return
517 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:24
def prepare(self, useInput=None, refRel='', fromScratch=None)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def makeCmd(self, step)
Definition: MatrixReader.py:94
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def makeStep(self, step, overrides)
def showWorkFlows(self, selected=None, extended=True, cafVeto=True)
def show(self, selected=None, extended=True, cafVeto=True)
def showRaw(self, useInput, refRel=None, fromScratch=None, what='all', step1Only=False, selected=None)
def reset(self, what='all')
Definition: MatrixReader.py:33
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def createWorkFlows(self, fileNameIn)
def __init__(self, opt)
Definition: MatrixReader.py:19
def workFlowsByLocation(self, cafVeto=True)
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run