CMS 3D CMS Logo

MatrixReader.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import sys, os
3 
4 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
5 from Configuration.PyReleaseValidation.MatrixUtil import InputInfo
6 
7 # ================================================================================
8 
10  def __init__(self, msg):
11  self.msg = msg
12  def __str__(self):
13  return self.msg
14 
15 # ================================================================================
16 
18 
19  def __init__(self, opt):
20 
21  self.reset(opt.what)
22 
23  self.wm=opt.wmcontrol
24  self.revertDqmio=opt.revertDqmio
25  self.addCommand=opt.command
26  self.apply=opt.apply
27  self.commandLineWf=opt.workflow
28  self.overWrite=opt.overWrite
29 
30  self.noRun = opt.noRun
31  return
32 
33  def reset(self, what='all'):
34 
35  self.what = what
36 
37  #a bunch of information, but not yet the WorkFlow object
38  self.workFlowSteps = {}
39  #the actual WorkFlow objects
40  self.workFlows = []
41  self.nameList = {}
42 
43  self.filesPrefMap = {'relval_standard' : 'std-' ,
44  'relval_highstats': 'hi-' ,
45  'relval_pileup': 'PU-' ,
46  'relval_generator': 'gen-',
47  'relval_extendedgen': 'genExt-',
48  'relval_production': 'prod-' ,
49  'relval_ged': 'ged-',
50  'relval_upgrade':'upg-',
51  'relval_cleanedupgrade':'clnupg-',
52  'relval_gpu':'gpu-',
53  'relval_2017':'2017-',
54  'relval_2026':'2026-',
55  'relval_identity':'id-',
56  'relval_machine': 'mach-',
57  'relval_premix': 'premix-',
58  'relval_nano':'nano-'
59  }
60 
61  self.files = ['relval_standard' ,
62  'relval_highstats',
63  'relval_pileup',
64  'relval_generator',
65  'relval_extendedgen',
66  'relval_production',
67  'relval_ged',
68  'relval_upgrade',
69  'relval_cleanedupgrade',
70  'relval_gpu',
71  'relval_2017',
72  'relval_2026',
73  'relval_identity',
74  'relval_machine',
75  'relval_premix',
76  'relval_nano'
77  ]
78  self.filesDefault = {'relval_standard':True ,
79  'relval_highstats':True ,
80  'relval_pileup':True,
81  'relval_generator':True,
82  'relval_extendedgen':True,
83  'relval_production':True,
84  'relval_ged':True,
85  'relval_upgrade':False,
86  'relval_cleanedupgrade':False,
87  'relval_gpu':False,
88  'relval_2017':True,
89  'relval_2026':True,
90  'relval_identity':False,
91  'relval_machine':True,
92  'relval_premix':True,
93  'relval_nano':True
94  }
95 
96  self.relvalModule = None
97 
98  return
99 
100  def makeCmd(self, step):
101 
102  cmd = ''
103  cfg = None
104  input = None
105  for k,v in step.items():
106  if 'no_exec' in k : continue # we want to really run it ...
107  if k.lower() == 'cfg':
108  cfg = v
109  continue # do not append to cmd, return separately
110  if k.lower() == 'input':
111  input = v
112  continue # do not append to cmd, return separately
113 
114  #chain the configs
115  #if k.lower() == '--python':
116  # v = 'step%d_%s'%(index,v)
117  cmd += ' ' + k + ' ' + str(v)
118  return cfg, input, cmd
119 
120  def makeStep(self,step,overrides):
122  if len(overrides) > 0:
123  copyStep=merge([overrides]+[step])
124  return copyStep
125  else:
126  return step
127 
128  def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
129 
130  prefix = self.filesPrefMap[fileNameIn]
131 
132  print("processing", fileNameIn)
133 
134  try:
135  _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
136  self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
137  except Exception as e:
138  print("ERROR importing file ", fileNameIn, str(e))
139  return
140 
141  if useInput is not None:
142  print("request for INPUT for ", useInput)
143 
144 
145  fromInput={}
146 
147  if useInput:
148  for i in useInput:
149  if ':' in i:
150  (ik,il)=i.split(':')
151  if ik=='all':
152  for k in self.relvalModule.workflows.keys():
153  fromInput[float(k)]=int(il)
154  else:
155  fromInput[float(ik)]=int(il)
156  else:
157  if i=='all':
158  for k in self.relvalModule.workflows.keys():
159  fromInput[float(k)]=0
160  else:
161  fromInput[float(i)]=0
162 
163  if fromScratch:
164  fromScratch=map(float,fromScratch)
165  for num in fromScratch:
166  if num in fromInput:
167  fromInput.pop(num)
168  #overwrite steps
169  if self.overWrite:
170  for p in self.overWrite:
171  self.relvalModule.steps.overwrite(p)
172 
173  #change the origin of dataset on the fly
174  if refRel:
175  if ',' in refRel:
176  refRels=refRel.split(',')
177  if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
178  return
180  self.relvalModule.steps,
181  list(zip(self.relvalModule.baseDataSetRelease,refRels))
182  )
183  else:
185  self.relvalModule.steps,
186  [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
187  )
188 
189 
190  for num, wfInfo in self.relvalModule.workflows.items():
191  commands=[]
192  wfName = wfInfo[0]
193  stepList = wfInfo[1]
194  stepOverrides=wfInfo.overrides
195  # upgrade case: workflow has basic name, key[, suffix (only special workflows)]
196  wfKey = ""
197  wfSuffix = ""
198  if isinstance(wfName, list) and len(wfName)>1:
199  if len(wfName)>2: wfSuffix = wfName[2]
200  wfKey = wfName[1]
201  wfName = wfName[0]
202  # if no explicit name given for the workflow, use the name of step1
203  if wfName.strip() == '': wfName = stepList[0]
204  # option to specialize the wf as the third item in the WF list
205  addTo=None
206  addCom=None
207  if len(wfInfo)>=3:
208  addCom=wfInfo[2]
209  if not isinstance(addCom, list): addCom=[addCom]
210  #print 'added dict',addCom
211  if len(wfInfo)>=4:
212  addTo=wfInfo[3]
213  #pad with 0
214  while len(addTo)!=len(stepList):
215  addTo.append(0)
216 
217  name=wfName
218  # separate suffixes by + because show() excludes first part of name
219  if len(wfKey)>0:
220  name = name+'+'+wfKey
221  if len(wfSuffix)>0: name = name+wfSuffix
222  stepIndex=0
223  ranStepList=[]
224  name_for_workflow = name
225 
226  #first resolve INPUT possibilities
227  if num in fromInput:
228  ilevel=fromInput[num]
229  #print num,ilevel
230  for (stepIr,step) in enumerate(reversed(stepList)):
231  stepName=step
232  stepI=(len(stepList)-stepIr)-1
233  #print stepIr,step,stepI,ilevel
234  if stepI>ilevel:
235  #print "ignoring"
236  continue
237  if stepI!=0:
238  testName='__'.join(stepList[0:stepI+1])+'INPUT'
239  else:
240  testName=step+'INPUT'
241  #print "JR",stepI,stepIr,testName,stepList
242  if testName in self.relvalModule.steps:
243  #print "JR",stepI,stepIr
244  stepList[stepI]=testName
245  #pop the rest in the list
246  #print "\tmod prepop",stepList
247  for p in range(stepI):
248  stepList.pop(0)
249  #print "\t\tmod",stepList
250  break
251 
252 
253  for (stepI,step) in enumerate(stepList):
254  stepName=step
255  if self.relvalModule.steps[stepName] is None:
256  continue
257  if self.wm:
258  #cannot put a certain number of things in wm
259  if stepName in ['SKIMD','SKIMCOSD','SKIMDreHLT']:
260  continue
261 
262  #replace stepName is needed
263  #if stepName in self.replaceStep
264  if len(name) > 0 : name += '+'
265  #any step can be mirrored with INPUT
266 
267  """
268  if num in fromInput:
269  if step+'INPUT' in self.relvalModule.steps.keys():
270  stepName = step+"INPUT"
271  stepList.remove(step)
272  stepList.insert(stepIndex,stepName)
273  """
274  stepNameTmp = stepName
275  if len(wfKey)>0: stepNameTmp = stepNameTmp.replace('_'+wfKey,"")
276  if len(wfSuffix)>0: stepNameTmp = stepNameTmp.replace(wfSuffix,"")
277  name += stepNameTmp
278  if addCom and (not addTo or addTo[stepIndex]==1):
280  copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)])
281  cfg, input, opts = self.makeCmd(copyStep)
282  else:
283  cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides))
284 
285  if input and cfg :
286  msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
287  raise MatrixException(msg)
288 
289  if input:
290  cmd = input
291  if self.noRun:
292  cmd.run=[]
293  else:
294  if cfg:
295  cmd = 'cmsDriver.py '+cfg+' '+opts
296  else:
297  cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
298  if self.wm:
299  cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
300  if self.addCommand:
301  if self.apply:
302  if stepIndex in self.apply or stepName in self.apply:
303  cmd +=' '+self.addCommand
304  else:
305  cmd +=' '+self.addCommand
306  if self.wm and self.revertDqmio=='yes':
307  cmd=cmd.replace('DQMIO','DQM')
308  cmd=cmd.replace('--filetype DQM','')
309  commands.append(cmd)
310  ranStepList.append(stepName)
311  stepIndex+=1
312  self.workFlowSteps[(num,prefix)] = (num, name_for_workflow, commands, ranStepList)
313 
314  return
315 
316 
317  def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
318 
319  if selected:
320  selected=map(float,selected)
321  for matrixFile in self.files:
322 
323  self.reset(what)
324 
325  if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
326  print("ignoring non-requested file",matrixFile)
327  continue
328 
329  if self.what == 'all' and not self.filesDefault[matrixFile]:
330  print("ignoring file not used by default (enable with -w)",matrixFile)
331  continue
332 
333  try:
334  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
335  except Exception as e:
336  print("ERROR reading file:", matrixFile, str(e))
337  raise
338 
339  if not self.workFlowSteps: continue
340 
341  dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
342  outFile = open(dataFileName,'w')
343 
344  print("found ", len(self.workFlowSteps), ' workflows for ', dataFileName)
345  ids = sorted(self.workFlowSteps.keys())
346  indexAndSteps=[]
347 
348  writtenWF=0
349  for key in ids:
350  if selected and not (key[0] in selected):
351  continue
352  #trick to skip the HImix IB test
353  if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
354  num, name, commands, stepList = self.workFlowSteps[key]
355  wfName,stepNames= name.split('+',1)
356 
357  stepNames=stepNames.replace('+SKIMCOSD','')
358  stepNames=stepNames.replace('+SKIMD','')
359  if 'HARVEST' in stepNames:
360  #find out automatically what to remove
361  exactb=stepNames.index('+HARVEST')
362  exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
363  stepNames=stepNames.replace(stepNames[exactb:exacte],'')
364  otherSteps = None
365  if '+' in stepNames:
366  step1,otherSteps = stepNames.split('+',1)
367 
368  line = str(num) + ' ++ '+ wfName
369  if otherSteps and not step1Only:
370  line += ' ++ ' +otherSteps.replace('+',',')
371  else:
372  line += ' ++ none'
373  inputInfo=None
374  if not isinstance(commands[0],str):
375  inputInfo=commands[0]
376  if otherSteps:
377  for (i,c) in enumerate(otherSteps.split('+')):
378  #pad with set
379  for p in range(len(indexAndSteps),i+2):
380  indexAndSteps.append(set())
381  indexAndSteps[i+1].add((c,commands[i+1]))
382 
383  if inputInfo :
384  #skip the samples from INPUT when step1Only is on
385  if step1Only: continue
386  line += ' ++ REALDATA: '+inputInfo.dataSet
387  if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
388  line += ', FILES: ' +str(inputInfo.files)
389  line += ', EVENTS: '+str(inputInfo.events)
390  if inputInfo.label!='':
391  line += ', LABEL: ' +inputInfo.label
392  line += ', LOCATION:'+inputInfo.location
393  line += ' @@@'
394  else:
395  line += ' @@@ '+commands[0]
396  if self.revertDqmio=='yes':
397  line=line.replace('DQMIO','DQM')
398  writtenWF+=1
399  outFile.write(line+'\n')
400 
401 
402  outFile.write('\n'+'\n')
403  if step1Only: continue
404 
405  for (index,s) in enumerate(indexAndSteps):
406  for (stepName,cmd) in s:
407  stepIndex=index+1
408  if 'dasquery.log' in cmd: continue
409  line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
410  if self.revertDqmio=='yes':
411  line=line.replace('DQMIO','DQM')
412  outFile.write(line+'\n')
413  outFile.write('\n'+'\n')
414  outFile.close()
415  print("wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name)
416  return
417 
418  def workFlowsByLocation(self, cafVeto=True):
419  # Check if we are on CAF
420  onCAF = False
421  if 'cms/caf/cms' in os.environ['CMS_PATH']:
422  onCAF = True
423 
424  workflows = []
425  for workflow in self.workFlows:
426  if isinstance(workflow.cmds[0], InputInfo):
427  if cafVeto and (workflow.cmds[0].location == 'CAF' and not onCAF):
428  continue
429  workflows.append(workflow)
430 
431  return workflows
432 
433  def showWorkFlows(self, selected=None, extended=True, cafVeto=True):
434  if selected: selected = list(map(float,selected))
435  wfs = self.workFlowsByLocation(cafVeto)
436  maxLen = 100 # for summary, limit width of output
437  fmt1 = "%-6s %-35s [1]: %s ..."
438  fmt2 = " %35s [%d]: %s ..."
439  print("\nfound a total of ", len(wfs), ' workflows:')
440  if selected:
441  print(" of which the following", len(selected), 'were selected:')
442  #-ap for now:
443  maxLen = -1 # for individual listing, no limit on width
444  fmt1 = "%-6s %-35s [1]: %s "
445  fmt2 = " %35s [%d]: %s"
446 
447  N=[]
448  for wf in wfs:
449  if selected and float(wf.numId) not in selected: continue
450  if extended: print('')
451  #pad with zeros
452  for i in range(len(N),len(wf.cmds)): N.append(0)
453  N[len(wf.cmds)-1]+=1
454  wfName = wf.nameId
455  stepNames = '+'.join(wf.stepList)
456  for i,s in enumerate(wf.cmds):
457  if extended:
458  if i==0:
459  print(fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen]))
460  else:
461  print(fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen]))
462  else:
463  print("%-6s %-35s "% (wf.numId, stepNames))
464  break
465  print('')
466  for i,n in enumerate(N):
467  if n: print(n,'workflows with',i+1,'steps')
468 
469  return
470 
471  def createWorkFlows(self, fileNameIn):
472 
473  prefixIn = self.filesPrefMap[fileNameIn]
474 
475  # get through the list of items and update the requested workflows only
476  keyList = self.workFlowSteps.keys()
477  ids = []
478  for item in keyList:
479  id, pref = item
480  if pref != prefixIn : continue
481  ids.append(id)
482  ids.sort()
483  for key in ids:
484  val = self.workFlowSteps[(key,prefixIn)]
485  num, name, commands, stepList = val
486  nameId = str(num)+'_'+name
487  if nameId in self.nameList:
488  print("==> duplicate name found for ", nameId)
489  print(' keeping : ', self.nameList[nameId])
490  print(' ignoring : ', val)
491  else:
492  self.nameList[nameId] = val
493 
494  self.workFlows.append(WorkFlow(num, name, commands=commands, stepList=stepList))
495 
496  return
497 
498  def prepare(self, useInput=None, refRel='', fromScratch=None):
499 
500  for matrixFile in self.files:
501  if self.what != 'all' and not any('_'+el in matrixFile for el in self.what.split(",")):
502  print("ignoring non-requested file",matrixFile)
503  continue
504  if self.what == 'all' and not self.filesDefault[matrixFile]:
505  print("ignoring",matrixFile,"from default matrix")
506  continue
507 
508  try:
509  self.readMatrix(matrixFile, useInput, refRel, fromScratch)
510  except Exception as e:
511  print("ERROR reading file:", matrixFile, str(e))
512  raise
513 
514  try:
515  self.createWorkFlows(matrixFile)
516  except Exception as e:
517  print("ERROR creating workflows :", str(e))
518  raise
519 
520 
521  def show(self, selected=None, extended=True, cafVeto=True):
522 
523  self.showWorkFlows(selected, extended, cafVeto)
524  print('\n','-'*80,'\n')
525 
526 
527  def updateDB(self):
528 
529  import pickle
530  pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
531 
532  return
533 
Definition: merge.py:1
revertDqmio
maybe we want too level deep input
Definition: MatrixReader.py:24
def prepare(self, useInput=None, refRel='', fromScratch=None)
bool any(const std::vector< T > &v, const T &what)
Definition: ECalSD.cc:37
def makeCmd(self, step)
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def makeStep(self, step, overrides)
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def showWorkFlows(self, selected=None, extended=True, cafVeto=True)
def show(self, selected=None, extended=True, cafVeto=True)
def showRaw(self, useInput, refRel=None, fromScratch=None, what='all', step1Only=False, selected=None)
def reset(self, what='all')
Definition: MatrixReader.py:33
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def createWorkFlows(self, fileNameIn)
def changeRefRelease(steps, listOfPairs)
Definition: MatrixUtil.py:240
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def __init__(self, opt)
Definition: MatrixReader.py:19
def workFlowsByLocation(self, cafVeto=True)
#define str(s)