CMS 3D CMS Logo

MatrixReader.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import sys, os
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
6 
7 # ================================================================================
8 
10  def __init__(self, msg):
11  self.msg = msg
12  def __str__(self):
13  return self.msg
14 
15 # ================================================================================
16 
18 
19  def __init__(self, opt):
20 
21  self.reset(opt.what)
22 
23  self.wm=opt.wmcontrol
24  self.revertDqmio=opt.revertDqmio
25  self.addCommand=opt.command
26  self.apply=opt.apply
27  self.commandLineWf=opt.workflow
28  self.overWrite=opt.overWrite
29 
30  self.noRun = opt.noRun
31  return
32 
33  def reset(self, what='all'):
34 
35  self.what = what
36 
37  #a bunch of information, but not yet the WorkFlow object
38  self.workFlowSteps = {}
39  #the actual WorkFlow objects
40  self.workFlows = []
41  self.nameList = {}
42 
43  self.filesPrefMap = {'relval_standard' : 'std-' ,
44  'relval_highstats': 'hi-' ,
45  'relval_pileup': 'PU-' ,
46  'relval_generator': 'gen-',
47  'relval_extendedgen': 'genExt-',
48  'relval_production': 'prod-' ,
49  'relval_ged': 'ged-',
50  'relval_upgrade':'upg-',
51  'relval_2017':'2017-',
52  'relval_2023':'2023-',
53  'relval_identity':'id-',
54  'relval_machine': 'mach-',
55  'relval_unsch': 'unsch-',
56  'relval_premix': 'premix-'
57  }
58 
59  self.files = ['relval_standard' ,
60  'relval_highstats',
61  'relval_pileup',
62  'relval_generator',
63  'relval_extendedgen',
64  'relval_production',
65  'relval_ged',
66  'relval_upgrade',
67  'relval_2017',
68  'relval_2023',
69  'relval_identity',
70  'relval_machine',
71  'relval_unsch',
72  'relval_premix'
73  ]
74  self.filesDefault = {'relval_standard':True ,
75  'relval_highstats':True ,
76  'relval_pileup':True,
77  'relval_generator':True,
78  'relval_extendedgen':True,
79  'relval_production':True,
80  'relval_ged':True,
81  'relval_upgrade':False,
82  'relval_2017':True,
83  'relval_2023':True,
84  'relval_identity':False,
85  'relval_machine':True,
86  'relval_unsch':True,
87  'relval_premix':True
88  }
89 
90  self.relvalModule = None
91 
92  return
93 
94  def makeCmd(self, step):
95 
96  cmd = ''
97  cfg = None
98  input = None
99  for k,v in step.items():
100  if 'no_exec' in k : continue # we want to really run it ...
101  if k.lower() == 'cfg':
102  cfg = v
103  continue # do not append to cmd, return separately
104  if k.lower() == 'input':
105  input = v
106  continue # do not append to cmd, return separately
107 
108  #chain the configs
109  #if k.lower() == '--python':
110  # v = 'step%d_%s'%(index,v)
111  cmd += ' ' + k + ' ' + str(v)
112  return cfg, input, cmd
113 
114  def makeStep(self,step,overrides):
116  if len(overrides) > 0:
117  copyStep=merge([overrides]+[step])
118  return copyStep
119  else:
120  return step
121 
122  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
123 
124  prefix = self.filesPrefMap[fileNameIn]
125 
126  print("processing", fileNameIn)
127 
128  try:
129  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
130  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
131  except Exception as e:
132  print("ERROR importing file ", fileNameIn, str(e))
133  return
134 
135  if useInput is not None:
136  print("request for INPUT for ", useInput)
137 
138 
139  fromInput={}
140 
141  if useInput:
142  for i in useInput:
143  if ':' in i:
144  (ik,il)=i.split(':')
145  if ik=='all':
146  for k in self.relvalModule.workflows.keys():
147  fromInput[float(k)]=int(il)
148  else:
149  fromInput[float(ik)]=int(il)
150  else:
151  if i=='all':
152  for k in self.relvalModule.workflows.keys():
153  fromInput[float(k)]=0
154  else:
155  fromInput[float(i)]=0
156 
157  if fromScratch:
158  fromScratch=map(float,fromScratch)
159  for num in fromScratch:
160  if num in fromInput:
161  fromInput.pop(num)
162  #overwrite steps
163  if self.overWrite:
164  for p in self.overWrite:
165  self.relvalModule.steps.overwrite(p)
166 
167  #change the origin of dataset on the fly
168  if refRel:
169  if ',' in refRel:
170  refRels=refRel.split(',')
171  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
172  return
173  self.relvalModule.changeRefRelease(
174  self.relvalModule.steps,
175  list(zip(self.relvalModule.baseDataSetRelease,refRels))
176  )
177  else:
178  self.relvalModule.changeRefRelease(
179  self.relvalModule.steps,
180  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
181  )
182 
183 
184  for num, wfInfo in self.relvalModule.workflows.items():
185  commands=[]
186  wfName = wfInfo[0]
187  stepList = wfInfo[1]
188  stepOverrides=wfInfo.overrides
189  # if no explicit name given for the workflow, use the name of step1
190  if wfName.strip() == '': wfName = stepList[0]
191  # option to specialize the wf as the third item in the WF list
192  addTo=None
193  addCom=None
194  if len(wfInfo)>=3:
195  addCom=wfInfo[2]
196  if not isinstance(addCom, list): addCom=[addCom]
197  #print 'added dict',addCom
198  if len(wfInfo)>=4:
199  addTo=wfInfo[3]
200  #pad with 0
201  while len(addTo)!=len(stepList):
202  addTo.append(0)
203 
204  name=wfName
205  stepIndex=0
206  ranStepList=[]
207 
208  #first resolve INPUT possibilities
209  if num in fromInput:
210  ilevel=fromInput[num]
211  #print num,ilevel
212  for (stepIr,step) in enumerate(reversed(stepList)):
213  stepName=step
214  stepI=(len(stepList)-stepIr)-1
215  #print stepIr,step,stepI,ilevel
216  if stepI>ilevel:
217  #print "ignoring"
218  continue
219  if stepI!=0:
220  testName='__'.join(stepList[0:stepI+1])+'INPUT'
221  else:
222  testName=step+'INPUT'
223  #print "JR",stepI,stepIr,testName,stepList
224  if testName in self.relvalModule.steps.keys():
225  #print "JR",stepI,stepIr
226  stepList[stepI]=testName
227  #pop the rest in the list
228  #print "\tmod prepop",stepList
229  for p in range(stepI):
230  stepList.pop(0)
231  #print "\t\tmod",stepList
232  break
233 
234 
235  for (stepI,step) in enumerate(stepList):
236  stepName=step
237  if self.wm:
238  #cannot put a certain number of things in wm
239  if stepName in [
240  #'HARVEST','HARVESTD','HARVESTDreHLT',
241  'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT'
242  ]:
243  continue
244 
245  #replace stepName is needed
246  #if stepName in self.replaceStep
247  if len(name) > 0 : name += '+'
248  #any step can be mirrored with INPUT
249  ## maybe we want too level deep input
250  """
251  if num in fromInput:
252  if step+'INPUT' in self.relvalModule.steps.keys():
253  stepName = step+"INPUT"
254  stepList.remove(step)
255  stepList.insert(stepIndex,stepName)
256  """
257  name += stepName
258  if addCom and (not addTo or addTo[stepIndex]==1):
260  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
261  cfg, input, opts = self.makeCmd(copyStep)
262  else:
263  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
264 
265  if input and cfg :
266  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
267  raise MatrixException(msg)
268 
269  if input:
270  cmd = input
271  if self.noRun:
272  cmd.run=[]
273  else:
274  if cfg:
275  cmd = 'cmsDriver.py '+cfg+' '+opts
276  else:
277  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
278  if self.wm:
279  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
280  if self.addCommand:
281  if self.apply:
282  if stepIndex in self.apply or stepName in self.apply:
283  cmd +=' '+self.addCommand
284  else:
285  cmd +=' '+self.addCommand
286  if self.wm and self.revertDqmio=='yes':
287  cmd=cmd.replace('DQMIO','DQM')
288  cmd=cmd.replace('--filetype DQM','')
289  commands.append(cmd)
290  ranStepList.append(stepName)
291  stepIndex+=1
292 
293  self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
294 
295  return
296 
297 
298  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
299 
300  if selected:
301  selected=map(float,selected)
302  for matrixFile in self.files:
303 
304  self.reset(what)
305 
306  if self.what != 'all' and self.what not in matrixFile:
307  print("ignoring non-requested file",matrixFile)
308  continue
309 
310  if self.what == 'all' and not self.filesDefault[matrixFile]:
311  print("ignoring file not used by default (enable with -w)",matrixFile)
312  continue
313 
314  try:
315  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
316  except Exception as e:
317  print("ERROR reading file:", matrixFile, str(e))
318  raise
319 
320  if not self.workFlowSteps: continue
321 
322  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
323  outFile = open(dataFileName,'w')
324 
325  print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
326  ids = sorted(self.workFlowSteps.keys())
327  indexAndSteps=[]
328 
329  writtenWF=0
330  for key in ids:
331  if selected and not (key[0] in selected):
332  continue
333  #trick to skip the HImix IB test
334  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
335  num, name, commands, stepList = self.workFlowSteps[key]
336 
337  wfName,stepNames= name.split('+',1)
338 
339  stepNames=stepNames.replace('+RECODFROMRAWRECO','')
340  stepNames=stepNames.replace('+SKIMCOSD','')
341  stepNames=stepNames.replace('+SKIMD','')
342  if 'HARVEST' in stepNames:
343  #find out automatically what to remove
344  exactb=stepNames.index('+HARVEST')
345  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
346  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
347  otherSteps = None
348  if '+' in stepNames:
349  step1,otherSteps = stepNames.split('+',1)
350 
351  line = str(num) + ' ++ '+ wfName
352  if otherSteps and not step1Only:
353  line += ' ++ ' +otherSteps.replace('+',',')
354  else:
355  line += ' ++ none'
356  inputInfo=None
357  if not isinstance(commands[0],str):
358  inputInfo=commands[0]
359  if otherSteps:
360  for (i,c) in enumerate(otherSteps.split('+')):
361  #pad with set
362  for p in range(len(indexAndSteps),i+2):
363  indexAndSteps.append(set())
364  indexAndSteps[i+1].add((c,commands[i+1]))
365 
366  if inputInfo :
367  #skip the samples from INPUT when step1Only is on
368  if step1Only: continue
369  line += ' ++ REALDATA: '+inputInfo.dataSet
370  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
371  line += ', FILES: ' +str(inputInfo.files)
372  line += ', EVENTS: '+str(inputInfo.events)
373  if inputInfo.label!='':
374  line += ', LABEL: ' +inputInfo.label
375  line += ', LOCATION:'+inputInfo.location
376  line += ' @@@'
377  else:
378  line += ' @@@ '+commands[0]
379  if self.revertDqmio=='yes':
380  line=line.replace('DQMIO','DQM')
381  writtenWF+=1
382  outFile.write(line+'\n')
383 
384 
385  outFile.write('\n'+'\n')
386  if step1Only: continue
387 
388  for (index,s) in enumerate(indexAndSteps):
389  for (stepName,cmd) in s:
390  stepIndex=index+1
391  if 'dasquery.log' in cmd: continue
392  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
393  if self.revertDqmio=='yes':
394  line=line.replace('DQMIO','DQM')
395  outFile.write(line+'\n')
396  outFile.write('\n'+'\n')
397  outFile.close()
398  print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
399  return
400 
401  def workFlowsByLocation(self, cafVeto=True):
402  # Check if we are on CAF
403  onCAF = False
404  if 'cms/caf/cms' in os.environ['CMS_PATH']:
405  onCAF = True
406 
407  workflows = []
408  for workflow in self.workFlows:
409  if isinstance(workflow.cmds[0], InputInfo):
410  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
411  continue
412  workflows.append(workflow)
413 
414  return workflows
415 
416  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
417  if selected: selected = map(float,selected)
418  wfs = self.workFlowsByLocation(cafVeto)
419  maxLen = 100 # for summary, limit width of output
420  fmt1 = "%-6s %-35s [1]: %s ..."
421  fmt2 = " %35s [%d]: %s ..."
422  print("\nfound a total of ", len(wfs), ' workflows:')
423  if selected:
424  print(" of which the following", len(selected), 'were selected:')
425  #-ap for now:
426  maxLen = -1 # for individual listing, no limit on width
427  fmt1 = "%-6s %-35s [1]: %s "
428  fmt2 = " %35s [%d]: %s"
429 
430  N=[]
431  for wf in wfs:
432  if selected and float(wf.numId) not in selected: continue
433  if extended: print('')
434  #pad with zeros
435  for i in range(len(N),len(wf.cmds)): N.append(0)
436  N[len(wf.cmds)-1]+=1
437  wfName, stepNames = wf.nameId.split('+',1)
438  for i,s in enumerate(wf.cmds):
439  if extended:
440  if i==0:
441  print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
442  else:
443  print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
444  else:
445  print("%-6s %-35s "% (wf.numId, stepNames))
446  break
447  print('')
448  for i,n in enumerate(N):
449  if n: print(n,'workflows with',i+1,'steps')
450 
451  return
452 
453  def createWorkFlows(self, fileNameIn):
454 
455  prefixIn = self.filesPrefMap[fileNameIn]
456 
457  # get through the list of items and update the requested workflows only
458  keyList = self.workFlowSteps.keys()
459  ids = []
460  for item in keyList:
461  id, pref = item
462  if pref != prefixIn : continue
463  ids.append(id)
464  ids.sort()
465  for key in ids:
466  val = self.workFlowSteps[(key,prefixIn)]
467  num, name, commands, stepList = val
468  nameId = str(num)+'_'+name
469  if nameId in self.nameList:
470  print("==> duplicate name found for ", nameId)
471  print(' keeping : ', self.nameList[nameId])
472  print(' ignoring : ', val)
473  else:
474  self.nameList[nameId] = val
475 
476  self.workFlows.append(WorkFlow(num, name, commands=commands))
477 
478  return
479 
480  def prepare(self, useInput=None, refRel='', fromScratch=None):
481 
482  for matrixFile in self.files:
483  if self.what != 'all' and self.what not in matrixFile:
484  print("ignoring non-requested file",matrixFile)
485  continue
486  if self.what == 'all' and not self.filesDefault[matrixFile]:
487  print("ignoring",matrixFile,"from default matrix")
488  continue
489 
490  try:
491  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
492  except Exception as e:
493  print("ERROR reading file:", matrixFile, str(e))
494  raise
495 
496  try:
497  self.createWorkFlows(matrixFile)
498  except Exception as e:
499  print("ERROR creating workflows :", str(e))
500  raise
501 
502 
503  def show(self, selected=None, extended=True, cafVeto=True):
504 
505  self.showWorkFlows(selected, extended, cafVeto)
506  print('\n','-'*80,'\n')
507 
508 
509  def updateDB(self):
510 
511  import pickle
512  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
513 
514  return
515 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:24
def prepare(self, useInput=None, refRel='', fromScratch=None)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def makeCmd(self, step)
Definition: MatrixReader.py:94
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def makeStep(self, step, overrides)
def showWorkFlows(self, selected=None, extended=True, cafVeto=True)
def show(self, selected=None, extended=True, cafVeto=True)
def showRaw(self, useInput, refRel=None, fromScratch=None, what='all', step1Only=False, selected=None)
def reset(self, what='all')
Definition: MatrixReader.py:33
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def createWorkFlows(self, fileNameIn)
def __init__(self, opt)
Definition: MatrixReader.py:19
def workFlowsByLocation(self, cafVeto=True)
#define str(s)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run