CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_0/src/Configuration/PyReleaseValidation/python/MatrixReader.py

Go to the documentation of this file.
00001 
00002 import sys
00003 
00004 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
00005 
00006 # ================================================================================
00007 
00008 class MatrixException(Exception):
00009     def __init__(self, msg):
00010         self.msg = msg
00011     def __str__(self):
00012         return self.msg
00013         
00014 # ================================================================================
00015 
00016 class MatrixReader(object):
00017 
00018     def __init__(self, opt):
00019 
00020         self.reset(opt.what)
00021 
00022         self.wm=opt.wmcontrol
00023         self.addCommand=opt.command
00024         self.commandLineWf=opt.workflow
00025         self.overWrite=opt.overWrite
00026         
00027         return
00028 
00029     def reset(self, what='all'):
00030 
00031         self.what = what
00032 
00033         #a bunch of information, but not yet the WorkFlow object
00034         self.workFlowSteps = {}
00035         #the actual WorkFlow objects
00036         self.workFlows = []
00037         self.nameList  = {}
00038         
00039         self.filesPrefMap = {'relval_standard' : 'std-' ,
00040                              'relval_highstats': 'hi-'  ,
00041                              'relval_pileup': 'PU-'  ,
00042                              'relval_generator': 'gen-'  ,
00043                              'relval_production': 'prod-'  ,
00044                              'relval_ged': 'ged-'
00045                              }
00046 
00047         self.files = ['relval_standard' ,
00048                       'relval_highstats',
00049                       'relval_pileup',
00050                       'relval_generator',
00051                       'relval_production',
00052                       'relval_ged'
00053                       ]
00054 
00055         self.relvalModule = None
00056         
00057         return
00058 
00059     def makeCmd(self, step):
00060 
00061         cmd = ''
00062         cfg = None
00063         input = None
00064         for k,v in step.items():
00065             if 'no_exec' in k : continue  # we want to really run it ...
00066             if k.lower() == 'cfg':
00067                 cfg = v
00068                 continue # do not append to cmd, return separately
00069             if k.lower() == 'input':
00070                 input = v 
00071                 continue # do not append to cmd, return separately
00072             
00073             #chain the configs
00074             #if k.lower() == '--python':
00075             #    v = 'step%d_%s'%(index,v)
00076             cmd += ' ' + k + ' ' + str(v)
00077         return cfg, input, cmd
00078     
00079     def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
00080         
00081         prefix = self.filesPrefMap[fileNameIn]
00082         
00083         print "processing ", fileNameIn
00084         
00085         try:
00086             _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
00087             self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
00088         except Exception, e:
00089             print "ERROR importing file ", fileNameIn, str(e)
00090             return
00091 
00092         print "request for INPUT for ", useInput
00093 
00094         
00095         fromInput={}
00096         
00097         if useInput:
00098             for i in useInput:
00099                 if ':' in i:
00100                     (ik,il)=i.split(':')
00101                     if ik=='all':
00102                         for k in self.relvalModule.workflows.keys():
00103                             fromInput[float(k)]=int(il)
00104                     else:
00105                         fromInput[float(ik)]=int(il)
00106                 else:
00107                     if i=='all':
00108                         for k in self.relvalModule.workflows.keys():
00109                             fromInput[float(k)]=0
00110                     else:
00111                         fromInput[float(i)]=0
00112                 
00113         if fromScratch:
00114             fromScratch=map(float,fromScratch)
00115             for num in fromScratch:
00116                 if num in fromInput:
00117                     fromInput.pop(num)
00118         #overwrite steps
00119         if self.overWrite:
00120             for p in self.overWrite:
00121                 self.relvalModule.steps.overwrite(p)
00122         
00123         #change the origin of dataset on the fly
00124         if refRel:
00125             self.relvalModule.changeRefRelease(
00126                 self.relvalModule.steps,
00127                 [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
00128                 )
00129             
00130 
00131         for num, wfInfo in self.relvalModule.workflows.items():
00132             commands=[]
00133             wfName = wfInfo[0]
00134             stepList = wfInfo[1]
00135             # if no explicit name given for the workflow, use the name of step1
00136             if wfName.strip() == '': wfName = stepList[0]
00137             # option to specialize the wf as the third item in the WF list
00138             addTo=None
00139             addCom=None
00140             if len(wfInfo)>=3:
00141                 addCom=wfInfo[2]
00142                 if not type(addCom)==list:   addCom=[addCom]
00143                 #print 'added dict',addCom
00144                 if len(wfInfo)>=4:
00145                     addTo=wfInfo[3]
00146                     #pad with 0
00147                     while len(addTo)!=len(stepList):
00148                         addTo.append(0)
00149 
00150             name=wfName
00151             stepIndex=0
00152             ranStepList=[]
00153 
00154             #first resolve INPUT possibilities
00155             if num in fromInput:
00156                 ilevel=fromInput[num]
00157                 #print num,ilevel
00158                 for (stepIr,step) in enumerate(reversed(stepList)):
00159                     stepName=step
00160                     stepI=(len(stepList)-stepIr)-1
00161                     #print stepIr,step,stepI,ilevel                    
00162                     if stepI>ilevel:
00163                         #print "ignoring"
00164                         continue
00165                     if stepI!=0:
00166                         testName='__'.join(stepList[0:stepI+1])+'INPUT'
00167                     else:
00168                         testName=step+'INPUT'
00169                     #print "JR",stepI,stepIr,testName,stepList
00170                     if testName in self.relvalModule.steps.keys():
00171                         #print "JR",stepI,stepIr
00172                         stepList[stepI]=testName
00173                         #pop the rest in the list
00174                         #print "\tmod prepop",stepList
00175                         for p in range(stepI):
00176                             stepList.pop(0)
00177                         #print "\t\tmod",stepList
00178                         break
00179                                                         
00180                                                     
00181             for (stepI,step) in enumerate(stepList):
00182                 stepName=step
00183                 if self.wm:
00184                     #cannot put a certain number of things in wm
00185                     if stepName in ['SKIMD','HARVESTD','HARVEST','HARVESTD','RECODFROMRAWRECO']:
00186                         continue
00187                 #replace stepName is needed
00188                 #if stepName in self.replaceStep
00189                 if len(name) > 0 : name += '+'
00190                 #any step can be mirrored with INPUT
00191                 ## maybe we want too level deep input
00192                 """
00193                 if num in fromInput:
00194                     if step+'INPUT' in self.relvalModule.steps.keys():
00195                         stepName = step+"INPUT"
00196                         stepList.remove(step)
00197                         stepList.insert(stepIndex,stepName)
00198                 """    
00199                 name += stepName
00200 
00201                 if addCom and (not addTo or addTo[stepIndex]==1):
00202                     from Configuration.PyReleaseValidation.relval_steps import merge
00203                     copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
00204                     cfg, input, opts = self.makeCmd(copyStep)
00205                 else:
00206                     cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
00207 
00208                 if input and cfg :
00209                     msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
00210                     raise MatrixException(msg)
00211 
00212                 if input:
00213                     cmd = input
00214                 else:
00215                     if cfg:
00216                         cmd  = 'cmsDriver.py '+cfg+' '+opts
00217                     else:
00218                         cmd  = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
00219                     if self.wm:
00220                         cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
00221                     if self.addCommand:
00222                         cmd +=' '+self.addCommand
00223                 commands.append(cmd)
00224                 ranStepList.append(stepName)
00225                 stepIndex+=1
00226                 
00227             self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
00228         
00229         return
00230 
00231 
00232     def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
00233 
00234         if selected:
00235             selected=map(float,selected)
00236         for matrixFile in self.files:
00237 
00238             self.reset(what)
00239 
00240             if self.what != 'all' and self.what not in matrixFile:
00241                 print "ignoring non-requested file",matrixFile
00242                 continue
00243 
00244             try:
00245                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
00246             except Exception, e:
00247                 print "ERROR reading file:", matrixFile, str(e)
00248                 raise
00249 
00250             if not self.workFlowSteps: continue
00251 
00252             dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
00253             outFile = open(dataFileName,'w')
00254 
00255             print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
00256             ids = self.workFlowSteps.keys()
00257             ids.sort()
00258             indexAndSteps=[]
00259 
00260             writtenWF=0
00261             for key in ids:
00262                 if selected and not (key[0] in selected):
00263                     continue
00264                 #trick to skip the HImix IB test
00265                 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
00266                 num, name, commands, stepList = self.workFlowSteps[key]
00267                 
00268                 wfName,stepNames= name.split('+',1)
00269                 
00270                 stepNames=stepNames.replace('+RECODFROMRAWRECO','')
00271                 stepNames=stepNames.replace('+SKIMCOSD','')
00272                 stepNames=stepNames.replace('+SKIMD','')
00273                 if 'HARVEST' in stepNames:
00274                     #find out automatically what to remove
00275                     exactb=stepNames.index('+HARVEST')
00276                     exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
00277                     stepNames=stepNames.replace(stepNames[exactb:exacte],'')
00278                 otherSteps = None
00279                 if '+' in stepNames:
00280                     step1,otherSteps = stepNames.split('+',1)
00281                 
00282                 line = str(num) + ' ++ '+ wfName 
00283                 if otherSteps and not step1Only:
00284                     line += ' ++ ' +otherSteps.replace('+',',')
00285                 else:
00286                     line += ' ++ none'
00287                 inputInfo=None
00288                 if not isinstance(commands[0],str):
00289                     inputInfo=commands[0]
00290                 if otherSteps:
00291                     for (i,c) in enumerate(otherSteps.split('+')):
00292                         #pad with set
00293                         for p in range(len(indexAndSteps),i+2):
00294                             indexAndSteps.append(set())
00295                         indexAndSteps[i+1].add((c,commands[i+1]))
00296 
00297                 if inputInfo :
00298                     #skip the samples from INPUT when step1Only is on
00299                     if step1Only: continue
00300                     line += ' ++ REALDATA: '+inputInfo.dataSet
00301                     if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
00302                     line += ', FILES: ' +str(inputInfo.files)
00303                     line += ', EVENTS: '+str(inputInfo.events)
00304                     if inputInfo.label!='':
00305                         line += ', LABEL: ' +inputInfo.label
00306                     line += ', LOCATION:'+inputInfo.location
00307                     line += ' @@@'
00308                 else:
00309                     line += ' @@@ '+commands[0]
00310                 line=line.replace('DQMROOT','DQM')
00311                 writtenWF+=1
00312                 outFile.write(line+'\n')
00313 
00314 
00315             outFile.write('\n'+'\n')
00316             if step1Only: continue
00317 
00318             for (index,s) in enumerate(indexAndSteps):
00319                 for (stepName,cmd) in s:
00320                     stepIndex=index+1
00321                     if 'dbsquery.log' in cmd: continue
00322                     line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
00323                     line=line.replace('DQMROOT','DQM')
00324                     outFile.write(line+'\n')
00325                 outFile.write('\n'+'\n')
00326             outFile.close()
00327             print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
00328         return 
00329                     
00330 
00331     def showWorkFlows(self, selected=None, extended=True):
00332         if selected: selected = map(float,selected)
00333         maxLen = 100 # for summary, limit width of output
00334         fmt1   = "%-6s %-35s [1]: %s ..."
00335         fmt2   = "       %35s [%d]: %s ..."
00336         print "\nfound a total of ", len(self.workFlows), ' workflows:'
00337         if selected:
00338             print "      of which the following", len(selected), 'were selected:'
00339         #-ap for now:
00340         maxLen = -1  # for individual listing, no limit on width
00341         fmt1   = "%-6s %-35s [1]: %s " 
00342         fmt2   = "       %35s [%d]: %s"
00343 
00344         N=[]
00345         for wf in self.workFlows:
00346             if selected and float(wf.numId) not in selected: continue
00347             if extended: print ''
00348             #pad with zeros
00349             for i in range(len(N),len(wf.cmds)):                N.append(0)
00350             N[len(wf.cmds)-1]+=1
00351             wfName, stepNames = wf.nameId.split('+',1)
00352             for i,s in enumerate(wf.cmds):
00353                 if extended:
00354                     if i==0:
00355                         print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
00356                     else:
00357                         print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
00358                 else:
00359                     print "%-6s %-35s "% (wf.numId, stepNames)
00360                     break
00361         print ''
00362         for i,n in enumerate(N):
00363             if n:            print n,'workflows with',i+1,'steps'
00364 
00365         return
00366     
00367     def createWorkFlows(self, fileNameIn):
00368 
00369         prefixIn = self.filesPrefMap[fileNameIn]
00370 
00371         # get through the list of items and update the requested workflows only
00372         keyList = self.workFlowSteps.keys()
00373         ids = []
00374         for item in keyList:
00375             id, pref = item
00376             if pref != prefixIn : continue
00377             ids.append(id)
00378         ids.sort()
00379         for key in ids:
00380             val = self.workFlowSteps[(key,prefixIn)]
00381             num, name, commands, stepList = val
00382             nameId = str(num)+'_'+name
00383             if nameId in self.nameList:
00384                 print "==> duplicate name found for ", nameId
00385                 print '    keeping  : ', self.nameList[nameId]
00386                 print '    ignoring : ', val
00387             else:
00388                 self.nameList[nameId] = val
00389 
00390             self.workFlows.append(WorkFlow(num, name, commands=commands))
00391 
00392         return
00393 
00394     def prepare(self, useInput=None, refRel='', fromScratch=None):
00395         
00396         for matrixFile in self.files:
00397             if self.what != 'all' and self.what not in matrixFile:
00398                 print "ignoring non-requested file",matrixFile
00399                 continue
00400 
00401             try:
00402                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
00403             except Exception, e:
00404                 print "ERROR reading file:", matrixFile, str(e)
00405                 raise
00406             
00407             try:
00408                 self.createWorkFlows(matrixFile)
00409             except Exception, e:
00410                 print "ERROR creating workflows :", str(e)
00411                 raise
00412             
00413                 
00414     def show(self, selected=None, extended=True):    
00415 
00416         self.showWorkFlows(selected,extended)
00417         print '\n','-'*80,'\n'
00418 
00419 
00420     def updateDB(self):
00421 
00422         import pickle
00423         pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
00424 
00425         return
00426