CMS 3D CMS Logo

MatrixReader.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import sys, os
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
6 
7 # ================================================================================
8 
10  def __init__(self, msg):
11  self.msg = msg
12  def __str__(self):
13  return self.msg
14 
15 # ================================================================================
16 
18 
19  def __init__(self, opt):
20 
21  self.reset(opt.what)
22 
23  self.wm=opt.wmcontrol
24  self.revertDqmio=opt.revertDqmio
25  self.addCommand=opt.command
26  self.apply=opt.apply
27  self.commandLineWf=opt.workflow
28  self.overWrite=opt.overWrite
29 
30  self.noRun = opt.noRun
31  return
32 
33  def reset(self, what='all'):
34 
35  self.what = what
36 
37  #a bunch of information, but not yet the WorkFlow object
38  self.workFlowSteps = {}
39  #the actual WorkFlow objects
40  self.workFlows = []
41  self.nameList = {}
42 
43  self.filesPrefMap = {'relval_standard' : 'std-' ,
44  'relval_highstats': 'hi-' ,
45  'relval_pileup': 'PU-' ,
46  'relval_generator': 'gen-',
47  'relval_extendedgen': 'genExt-',
48  'relval_production': 'prod-' ,
49  'relval_ged': 'ged-',
50  'relval_upgrade':'upg-',
51  'relval_cleanedupgrade':'clnupg-',
52  'relval_gpu':'gpu-',
53  'relval_2017':'2017-',
54  'relval_2026':'2026-',
55  'relval_identity':'id-',
56  'relval_machine': 'mach-',
57  'relval_premix': 'premix-',
58  'relval_nano':'nano-',
59  'relval_data_highstats':'data-'
60  }
61 
62  self.files = ['relval_standard' ,
63  'relval_highstats',
64  'relval_pileup',
65  'relval_generator',
66  'relval_extendedgen',
67  'relval_production',
68  'relval_ged',
69  'relval_upgrade',
70  'relval_cleanedupgrade',
71  'relval_gpu',
72  'relval_2017',
73  'relval_2026',
74  'relval_identity',
75  'relval_machine',
76  'relval_premix',
77  'relval_nano',
78  'relval_data_highstats'
79  ]
80  self.filesDefault = {'relval_standard':True ,
81  'relval_highstats':True ,
82  'relval_pileup':True,
83  'relval_generator':True,
84  'relval_extendedgen':True,
85  'relval_production':True,
86  'relval_ged':True,
87  'relval_upgrade':False,
88  'relval_cleanedupgrade':False,
89  'relval_gpu':False,
90  'relval_2017':True,
91  'relval_2026':True,
92  'relval_identity':False,
93  'relval_machine':True,
94  'relval_premix':True,
95  'relval_nano':True,
96  'relval_data_highstats':False
97  }
98 
99  self.relvalModule = None
100 
101  return
102 
103  def makeCmd(self, step):
104 
105  cmd = ''
106  cfg = None
107  input = None
108  for k,v in step.items():
109  if 'no_exec' in k : continue # we want to really run it ...
110  if k.lower() == 'cfg':
111  cfg = v
112  continue # do not append to cmd, return separately
113  if k.lower() == 'input':
114  input = v
115  continue # do not append to cmd, return separately
116 
117  #chain the configs
118  #if k.lower() == '--python':
119  # v = 'step%d_%s'%(index,v)
120  cmd += ' ' + k + ' ' + str(v)
121  return cfg, input, cmd
122 
123  def makeStep(self,step,overrides):
125  if len(overrides) > 0:
126  copyStep=merge([overrides]+[step])
127  return copyStep
128  else:
129  return step
130 
131  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
132 
133  prefix = self.filesPrefMap[fileNameIn]
134 
135  print("processing", fileNameIn)
136 
137  try:
138  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
139  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
140  except Exception as e:
141  print("ERROR importing file ", fileNameIn, str(e))
142  return
143 
144  if useInput is not None:
145  print("request for INPUT for ", useInput)
146 
147 
148  fromInput={}
149 
150  if useInput:
151  for i in useInput:
152  if ':' in i:
153  (ik,il)=i.split(':')
154  if ik=='all':
155  for k in self.relvalModule.workflows.keys():
156  fromInput[float(k)]=int(il)
157  else:
158  fromInput[float(ik)]=int(il)
159  else:
160  if i=='all':
161  for k in self.relvalModule.workflows.keys():
162  fromInput[float(k)]=0
163  else:
164  fromInput[float(i)]=0
165 
166  if fromScratch:
167  fromScratch=map(float,fromScratch)
168  for num in fromScratch:
169  if num in fromInput:
170  fromInput.pop(num)
171  #overwrite steps
172  if self.overWrite:
173  for p in self.overWrite:
174  self.relvalModule.steps.overwrite(p)
175 
176  #change the origin of dataset on the fly
177  if refRel:
178  if ',' in refRel:
179  refRels=refRel.split(',')
180  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
181  return
183  self.relvalModule.steps,
184  list(zip(self.relvalModule.baseDataSetRelease,refRels))
185  )
186  else:
188  self.relvalModule.steps,
189  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
190  )
191 
192  for num, wfInfo in self.relvalModule.workflows.items():
193  commands=[]
194  wfName = wfInfo[0]
195  stepList = wfInfo[1]
196  stepOverrides=wfInfo.overrides
197  # upgrade case: workflow has basic name, key[, suffix (only special workflows)]
198  wfKey = ""
199  wfSuffix = ""
200  if isinstance(wfName, list) and len(wfName)>1:
201  if len(wfName)>2: wfSuffix = wfName[2]
202  wfKey = wfName[1]
203  wfName = wfName[0]
204  # if no explicit name given for the workflow, use the name of step1
205  if wfName.strip() == '': wfName = stepList[0]
206  # option to specialize the wf as the third item in the WF list
207  addTo=None
208  addCom=None
209  if len(wfInfo)>=3:
210  addCom=wfInfo[2]
211  if not isinstance(addCom, list): addCom=[addCom]
212  #print 'added dict',addCom
213  if len(wfInfo)>=4:
214  addTo=wfInfo[3]
215  #pad with 0
216  while len(addTo)!=len(stepList):
217  addTo.append(0)
218 
219  name=wfName
220  # separate suffixes by + because show() excludes first part of name
221  if len(wfKey)>0:
222  name = name+'+'+wfKey
223  if len(wfSuffix)>0: name = name+wfSuffix
224  stepIndex=0
225  ranStepList=[]
226  name_for_workflow = name
227 
228  #first resolve INPUT possibilities
229  if num in fromInput:
230  ilevel=fromInput[num]
231  #print num,ilevel
232  for (stepIr,step) in enumerate(reversed(stepList)):
233  stepName=step
234  stepI=(len(stepList)-stepIr)-1
235  #print stepIr,step,stepI,ilevel
236  if stepI>ilevel:
237  #print "ignoring"
238  continue
239  if stepI!=0:
240  testName='__'.join(stepList[0:stepI+1])+'INPUT'
241  else:
242  testName=step+'INPUT'
243  #print "JR",stepI,stepIr,testName,stepList
244  if testName in self.relvalModule.steps:
245  #print "JR",stepI,stepIr
246  stepList[stepI]=testName
247  #pop the rest in the list
248  #print "\tmod prepop",stepList
249  for p in range(stepI):
250  stepList.pop(0)
251  #print "\t\tmod",stepList
252  break
253 
254  for (stepI,step) in enumerate(stepList):
255  stepName=step
256  if self.relvalModule.steps[stepName] is None:
257  continue
258  if self.wm:
259  #cannot put a certain number of things in wm
260  if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
261  continue
262 
263  #replace stepName is needed
264  #if stepName in self.replaceStep
265  if len(name) > 0 : name += '+'
266  #any step can be mirrored with INPUT
267 
268  """
269  if num in fromInput:
270  if step+'INPUT' in self.relvalModule.steps.keys():
271  stepName = step+"INPUT"
272  stepList.remove(step)
273  stepList.insert(stepIndex,stepName)
274  """
275  stepNameTmp = stepName
276  if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
277  if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
278  name += stepNameTmp
279  if addCom and (not addTo or addTo[stepIndex]==1):
281  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
282  cfg, input, opts = self.makeCmd(copyStep)
283  else:
284  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
285 
286  if input and cfg :
287  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
288  raise MatrixException(msg)
289 
290  if input:
291  cmd = input
292  if self.noRun:
293  cmd.run=[]
294  else:
295  if cfg:
296  cmd = 'cmsDriver.py '+cfg+' '+opts
297  else:
298  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
299  if self.wm:
300  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
301  if self.addCommand:
302  if self.apply:
303  if stepIndex in self.apply or stepName in self.apply:
304  cmd +=' '+self.addCommand
305  else:
306  cmd +=' '+self.addCommand
307  if self.wm and self.revertDqmio=='yes':
308  cmd=cmd.replace('DQMIO','DQM')
309  cmd=cmd.replace('--filetype DQM','')
310  commands.append(cmd)
311  ranStepList.append(stepName)
312  stepIndex+=1
313  self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
314 
315  return
316 
317 
318  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
319 
320  if selected:
321  selected=map(float,selected)
322  for matrixFile in self.files:
323 
324  self.reset(what)
325 
326  if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
327  print("ignoring non-requested file",matrixFile)
328  continue
329 
330  if self.what == 'all' and not self.filesDefault[matrixFile]:
331  print("ignoring file not used by default (enable with -w)",matrixFile)
332  continue
333 
334  try:
335  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
336  except Exception as e:
337  print("ERROR reading file:", matrixFile, str(e))
338  raise
339 
340  if not self.workFlowSteps: continue
341 
342  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
343  outFile = open(dataFileName,'w')
344 
345  print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
346  ids = sorted(self.workFlowSteps.keys())
347  indexAndSteps=[]
348 
349  writtenWF=0
350  for key in ids:
351  if selected and not (key[0] in selected):
352  continue
353  #trick to skip the HImix IB test
354  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
355  num, name, commands, stepList = self.workFlowSteps[key]
356  wfName,stepNames= name.split('+',1)
357 
358  stepNames=stepNames.replace('+SKIMCOSD','')
359  stepNames=stepNames.replace('+SKIMD','')
360  if 'HARVEST' in stepNames:
361  #find out automatically what to remove
362  exactb=stepNames.index('+HARVEST')
363  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
364  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
365  otherSteps = None
366  if '+' in stepNames:
367  step1,otherSteps = stepNames.split('+',1)
368 
369  line = str(num) + ' ++ '+ wfName
370  if otherSteps and not step1Only:
371  line += ' ++ ' +otherSteps.replace('+',',')
372  else:
373  line += ' ++ none'
374  inputInfo=None
375  if not isinstance(commands[0],str):
376  inputInfo=commands[0]
377  if otherSteps:
378  for (i,c) in enumerate(otherSteps.split('+')):
379  #pad with set
380  for p in range(len(indexAndSteps),i+2):
381  indexAndSteps.append(set())
382  indexAndSteps[i+1].add((c,commands[i+1]))
383 
384  if inputInfo :
385  #skip the samples from INPUT when step1Only is on
386  if step1Only: continue
387  line += ' ++ REALDATA: '+inputInfo.dataSet
388  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
389  line += ', FILES: ' +str(inputInfo.files)
390  line += ', EVENTS: '+str(inputInfo.events)
391  if inputInfo.label!='':
392  line += ', LABEL: ' +inputInfo.label
393  line += ', LOCATION:'+inputInfo.location
394  line += ' @@@'
395  else:
396  line += ' @@@ '+commands[0]
397  if self.revertDqmio=='yes':
398  line=line.replace('DQMIO','DQM')
399  writtenWF+=1
400  outFile.write(line+'\n')
401 
402 
403  outFile.write('\n'+'\n')
404  if step1Only: continue
405 
406  for (index,s) in enumerate(indexAndSteps):
407  for (stepName,cmd) in s:
408  stepIndex=index+1
409  if 'dasquery.log' in cmd: continue
410  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
411  if self.revertDqmio=='yes':
412  line=line.replace('DQMIO','DQM')
413  outFile.write(line+'\n')
414  outFile.write('\n'+'\n')
415  outFile.close()
416  print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
417  return
418 
419  def workFlowsByLocation(self, cafVeto=True):
420  # Check if we are on CAF
421  onCAF = False
422  if 'cms/caf/cms' in os.environ['CMS_PATH']:
423  onCAF = True
424 
425  workflows = []
426  for workflow in self.workFlows:
427  if isinstance(workflow.cmds[0], InputInfo):
428  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
429  continue
430  workflows.append(workflow)
431 
432  return workflows
433 
434  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
435  if selected: selected = list(map(float,selected))
436  wfs = self.workFlowsByLocation(cafVeto)
437  maxLen = 100 # for summary, limit width of output
438  fmt1 = "%-6s %-35s [1]: %s ..."
439  fmt2 = " %35s [%d]: %s ..."
440  print("\nfound a total of ", len(wfs), ' workflows:')
441  if selected:
442  print(" of which the following", len(selected), 'were selected:')
443  #-ap for now:
444  maxLen = -1 # for individual listing, no limit on width
445  fmt1 = "%-6s %-35s [1]: %s "
446  fmt2 = " %35s [%d]: %s"
447 
448  N=[]
449  for wf in wfs:
450  if selected and float(wf.numId) not in selected: continue
451  if extended: print('')
452  #pad with zeros
453  for i in range(len(N),len(wf.cmds)): N.append(0)
454  N[len(wf.cmds)-1]+=1
455  wfName = wf.nameId
456  stepNames = '+'.join(wf.stepList)
457  for i,s in enumerate(wf.cmds):
458  if extended:
459  if i==0:
460  print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
461  else:
462  print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
463  else:
464  print("%-6s %-35s "% (wf.numId, stepNames))
465  break
466  print('')
467  for i,n in enumerate(N):
468  if n: print(n,'workflows with',i+1,'steps')
469 
470  return
471 
472  def createWorkFlows(self, fileNameIn):
473 
474  prefixIn = self.filesPrefMap[fileNameIn]
475 
476  # get through the list of items and update the requested workflows only
477  keyList = self.workFlowSteps.keys()
478  ids = []
479  for item in keyList:
480  id, pref = item
481  if pref != prefixIn : continue
482  ids.append(id)
483  ids.sort()
484  for key in ids:
485  val = self.workFlowSteps[(key,prefixIn)]
486  num, name, commands, stepList = val
487  nameId = str(num)+'_'+name
488  if nameId in self.nameList:
489  print("==> duplicate name found for ", nameId)
490  print(' keeping : ', self.nameList[nameId])
491  print(' ignoring : ', val)
492  else:
493  self.nameList[nameId] = val
494 
495  self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
496 
497  return
498 
499  def prepare(self, useInput=None, refRel='', fromScratch=None):
500 
501  for matrixFile in self.files:
502  if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
503  print("ignoring non-requested file",matrixFile)
504  continue
505  if self.what == 'all' and not self.filesDefault[matrixFile]:
506  print("ignoring",matrixFile,"from default matrix")
507  continue
508 
509  try:
510  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
511  except Exception as e:
512  print("ERROR reading file:", matrixFile, str(e))
513  raise
514 
515  try:
516  self.createWorkFlows(matrixFile)
517  except Exception as e:
518  print("ERROR creating workflows :", str(e))
519  raise
520 
521 
522  def show(self, selected=None, extended=True, cafVeto=True):
523 
524  self.showWorkFlows(selected, extended, cafVeto)
525  print('\n','-'*80,'\n')
526 
527 
528  def updateDB(self):
529 
530  import pickle
531  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
532 
533  return
534 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:24
def prepare(self, useInput=None, refRel='', fromScratch=None)
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const &tracks, int32_t i)
Definition: TracksSoA.h:90
def makeCmd(self, step)
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None)
def makeStep(self, step, overrides)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def showWorkFlows(self, selected=None, extended=True, cafVeto=True)
def show(self, selected=None, extended=True, cafVeto=True)
def showRaw(self, useInput, refRel=None, fromScratch=None, what='all', step1Only=False, selected=None)
def reset(self, what='all')
Definition: MatrixReader.py:33
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def createWorkFlows(self, fileNameIn)
def changeRefRelease(steps, listOfPairs)
Definition: MatrixUtil.py:250
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def __init__(self, opt)
Definition: MatrixReader.py:19
def workFlowsByLocation(self, cafVeto=True)
#define str(s)