CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_9_patch3/src/Configuration/PyReleaseValidation/python/MatrixReader.py

Go to the documentation of this file.
00001 
00002 import sys
00003 
00004 from Configuration.PyReleaseValidation.WorkFlow import WorkFlow
00005 
00006 # ================================================================================
00007 
00008 class MatrixException(Exception):
00009     def __init__(self, msg):
00010         self.msg = msg
00011     def __str__(self):
00012         return self.msg
00013         
00014 # ================================================================================
00015 
00016 class MatrixReader(object):
00017 
00018     def __init__(self, opt):
00019 
00020         self.reset(opt.what)
00021 
00022         self.wm=opt.wmcontrol
00023         self.addCommand=opt.command
00024         self.commandLineWf=opt.workflow
00025         self.overWrite=opt.overWrite
00026         
00027         return
00028 
00029     def reset(self, what='all'):
00030 
00031         self.what = what
00032 
00033         #a bunch of information, but not yet the WorkFlow object
00034         self.workFlowSteps = {}
00035         #the actual WorkFlow objects
00036         self.workFlows = []
00037         self.nameList  = {}
00038         
00039         self.filesPrefMap = {'relval_standard' : 'std-' ,
00040                              'relval_highstats': 'hi-'  ,
00041                              'relval_pileup': 'PU-'  ,
00042                              'relval_generator': 'gen-'  ,
00043                              'relval_production': 'prod-'  ,
00044                              'relval_ged': 'ged-',
00045                              'relval_identity':'id-'
00046                              }
00047 
00048         self.files = ['relval_standard' ,
00049                       'relval_highstats',
00050                       'relval_pileup',
00051                       'relval_generator',
00052                       'relval_production',
00053                       'relval_ged',
00054                       'relval_identity'
00055                       ]
00056 
00057         self.relvalModule = None
00058         
00059         return
00060 
00061     def makeCmd(self, step):
00062 
00063         cmd = ''
00064         cfg = None
00065         input = None
00066         for k,v in step.items():
00067             if 'no_exec' in k : continue  # we want to really run it ...
00068             if k.lower() == 'cfg':
00069                 cfg = v
00070                 continue # do not append to cmd, return separately
00071             if k.lower() == 'input':
00072                 input = v 
00073                 continue # do not append to cmd, return separately
00074             
00075             #chain the configs
00076             #if k.lower() == '--python':
00077             #    v = 'step%d_%s'%(index,v)
00078             cmd += ' ' + k + ' ' + str(v)
00079         return cfg, input, cmd
00080     
00081     def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None):
00082         
00083         prefix = self.filesPrefMap[fileNameIn]
00084         
00085         print "processing ", fileNameIn
00086         
00087         try:
00088             _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn )
00089             self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn]
00090         except Exception, e:
00091             print "ERROR importing file ", fileNameIn, str(e)
00092             return
00093 
00094         print "request for INPUT for ", useInput
00095 
00096         
00097         fromInput={}
00098         
00099         if useInput:
00100             for i in useInput:
00101                 if ':' in i:
00102                     (ik,il)=i.split(':')
00103                     if ik=='all':
00104                         for k in self.relvalModule.workflows.keys():
00105                             fromInput[float(k)]=int(il)
00106                     else:
00107                         fromInput[float(ik)]=int(il)
00108                 else:
00109                     if i=='all':
00110                         for k in self.relvalModule.workflows.keys():
00111                             fromInput[float(k)]=0
00112                     else:
00113                         fromInput[float(i)]=0
00114                 
00115         if fromScratch:
00116             fromScratch=map(float,fromScratch)
00117             for num in fromScratch:
00118                 if num in fromInput:
00119                     fromInput.pop(num)
00120         #overwrite steps
00121         if self.overWrite:
00122             for p in self.overWrite:
00123                 self.relvalModule.steps.overwrite(p)
00124         
00125         #change the origin of dataset on the fly
00126         if refRel:
00127             if ',' in refRel:
00128                 refRels=refRel.split(',')
00129                 if len(refRels)!=len(self.relvalModule.baseDataSetRelease):
00130                     return
00131                 self.relvalModule.changeRefRelease(
00132                     self.relvalModule.steps,
00133                     zip(self.relvalModule.baseDataSetRelease,refRels)
00134                     )
00135             else:
00136                 self.relvalModule.changeRefRelease(
00137                     self.relvalModule.steps,
00138                     [(x,refRel) for x in self.relvalModule.baseDataSetRelease]
00139                     )
00140             
00141 
00142         for num, wfInfo in self.relvalModule.workflows.items():
00143             commands=[]
00144             wfName = wfInfo[0]
00145             stepList = wfInfo[1]
00146             # if no explicit name given for the workflow, use the name of step1
00147             if wfName.strip() == '': wfName = stepList[0]
00148             # option to specialize the wf as the third item in the WF list
00149             addTo=None
00150             addCom=None
00151             if len(wfInfo)>=3:
00152                 addCom=wfInfo[2]
00153                 if not type(addCom)==list:   addCom=[addCom]
00154                 #print 'added dict',addCom
00155                 if len(wfInfo)>=4:
00156                     addTo=wfInfo[3]
00157                     #pad with 0
00158                     while len(addTo)!=len(stepList):
00159                         addTo.append(0)
00160 
00161             name=wfName
00162             stepIndex=0
00163             ranStepList=[]
00164 
00165             #first resolve INPUT possibilities
00166             if num in fromInput:
00167                 ilevel=fromInput[num]
00168                 #print num,ilevel
00169                 for (stepIr,step) in enumerate(reversed(stepList)):
00170                     stepName=step
00171                     stepI=(len(stepList)-stepIr)-1
00172                     #print stepIr,step,stepI,ilevel                    
00173                     if stepI>ilevel:
00174                         #print "ignoring"
00175                         continue
00176                     if stepI!=0:
00177                         testName='__'.join(stepList[0:stepI+1])+'INPUT'
00178                     else:
00179                         testName=step+'INPUT'
00180                     #print "JR",stepI,stepIr,testName,stepList
00181                     if testName in self.relvalModule.steps.keys():
00182                         #print "JR",stepI,stepIr
00183                         stepList[stepI]=testName
00184                         #pop the rest in the list
00185                         #print "\tmod prepop",stepList
00186                         for p in range(stepI):
00187                             stepList.pop(0)
00188                         #print "\t\tmod",stepList
00189                         break
00190                                                         
00191                                                     
00192             for (stepI,step) in enumerate(stepList):
00193                 stepName=step
00194                 if self.wm:
00195                     #cannot put a certain number of things in wm
00196                     if stepName in ['HARVEST','HARVESTD','HARVESTDreHLT','RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT']:
00197                         continue
00198                     
00199                 #replace stepName is needed
00200                 #if stepName in self.replaceStep
00201                 if len(name) > 0 : name += '+'
00202                 #any step can be mirrored with INPUT
00203                 ## maybe we want too level deep input
00204                 """
00205                 if num in fromInput:
00206                     if step+'INPUT' in self.relvalModule.steps.keys():
00207                         stepName = step+"INPUT"
00208                         stepList.remove(step)
00209                         stepList.insert(stepIndex,stepName)
00210                 """    
00211                 name += stepName
00212 
00213                 if addCom and (not addTo or addTo[stepIndex]==1):
00214                     from Configuration.PyReleaseValidation.relval_steps import merge
00215                     copyStep=merge(addCom+[self.relvalModule.steps[stepName]])
00216                     cfg, input, opts = self.makeCmd(copyStep)
00217                 else:
00218                     cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName])
00219 
00220                 if input and cfg :
00221                     msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName
00222                     raise MatrixException(msg)
00223 
00224                 if input:
00225                     cmd = input
00226                 else:
00227                     if cfg:
00228                         cmd  = 'cmsDriver.py '+cfg+' '+opts
00229                     else:
00230                         cmd  = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts
00231                     if self.wm:
00232                         cmd+=' --io %s.io --python %s.py'%(stepName,stepName)
00233                     if self.addCommand:
00234                         cmd +=' '+self.addCommand
00235                     if self.wm:
00236                         cmd=cmd.replace('DQMROOT','DQM')
00237                 commands.append(cmd)
00238                 ranStepList.append(stepName)
00239                 stepIndex+=1
00240                 
00241             self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList)
00242         
00243         return
00244 
00245 
00246     def showRaw(self, useInput, refRel=None, fromScratch=None, what='all',step1Only=False,selected=None):
00247 
00248         if selected:
00249             selected=map(float,selected)
00250         for matrixFile in self.files:
00251 
00252             self.reset(what)
00253 
00254             if self.what != 'all' and self.what not in matrixFile:
00255                 print "ignoring non-requested file",matrixFile
00256                 continue
00257 
00258             try:
00259                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
00260             except Exception, e:
00261                 print "ERROR reading file:", matrixFile, str(e)
00262                 raise
00263 
00264             if not self.workFlowSteps: continue
00265 
00266             dataFileName = matrixFile.replace('relval_', 'cmsDriver_')+'_hlt.txt'
00267             outFile = open(dataFileName,'w')
00268 
00269             print "found ", len(self.workFlowSteps.keys()), ' workflows for ', dataFileName
00270             ids = self.workFlowSteps.keys()
00271             ids.sort()
00272             indexAndSteps=[]
00273 
00274             writtenWF=0
00275             for key in ids:
00276                 if selected and not (key[0] in selected):
00277                     continue
00278                 #trick to skip the HImix IB test
00279                 if key[0]==203.1 or key[0]==204.1 or key[0]==205.1 or key[0]==4.51 or key[0]==4.52: continue
00280                 num, name, commands, stepList = self.workFlowSteps[key]
00281                 
00282                 wfName,stepNames= name.split('+',1)
00283                 
00284                 stepNames=stepNames.replace('+RECODFROMRAWRECO','')
00285                 stepNames=stepNames.replace('+SKIMCOSD','')
00286                 stepNames=stepNames.replace('+SKIMD','')
00287                 if 'HARVEST' in stepNames:
00288                     #find out automatically what to remove
00289                     exactb=stepNames.index('+HARVEST')
00290                     exacte=stepNames.index('+',exactb+1) if ('+' in stepNames[exactb+1:]) else (len(stepNames))
00291                     stepNames=stepNames.replace(stepNames[exactb:exacte],'')
00292                 otherSteps = None
00293                 if '+' in stepNames:
00294                     step1,otherSteps = stepNames.split('+',1)
00295                 
00296                 line = str(num) + ' ++ '+ wfName 
00297                 if otherSteps and not step1Only:
00298                     line += ' ++ ' +otherSteps.replace('+',',')
00299                 else:
00300                     line += ' ++ none'
00301                 inputInfo=None
00302                 if not isinstance(commands[0],str):
00303                     inputInfo=commands[0]
00304                 if otherSteps:
00305                     for (i,c) in enumerate(otherSteps.split('+')):
00306                         #pad with set
00307                         for p in range(len(indexAndSteps),i+2):
00308                             indexAndSteps.append(set())
00309                         indexAndSteps[i+1].add((c,commands[i+1]))
00310 
00311                 if inputInfo :
00312                     #skip the samples from INPUT when step1Only is on
00313                     if step1Only: continue
00314                     line += ' ++ REALDATA: '+inputInfo.dataSet
00315                     if inputInfo.run!=[]: line += ', RUN:'+'|'.join(map(str,inputInfo.run))
00316                     line += ', FILES: ' +str(inputInfo.files)
00317                     line += ', EVENTS: '+str(inputInfo.events)
00318                     if inputInfo.label!='':
00319                         line += ', LABEL: ' +inputInfo.label
00320                     line += ', LOCATION:'+inputInfo.location
00321                     line += ' @@@'
00322                 else:
00323                     line += ' @@@ '+commands[0]
00324                 line=line.replace('DQMROOT','DQM')
00325                 writtenWF+=1
00326                 outFile.write(line+'\n')
00327 
00328 
00329             outFile.write('\n'+'\n')
00330             if step1Only: continue
00331 
00332             for (index,s) in enumerate(indexAndSteps):
00333                 for (stepName,cmd) in s:
00334                     stepIndex=index+1
00335                     if 'dbsquery.log' in cmd: continue
00336                     line = 'STEP%d ++ '%(stepIndex,) +stepName + ' @@@ '+cmd
00337                     line=line.replace('DQMROOT','DQM')
00338                     outFile.write(line+'\n')
00339                 outFile.write('\n'+'\n')
00340             outFile.close()
00341             print "wrote ",writtenWF, ' workflow'+('s' if (writtenWF!=1) else ''),' to ', outFile.name
00342         return 
00343                     
00344 
00345     def showWorkFlows(self, selected=None, extended=True):
00346         if selected: selected = map(float,selected)
00347         maxLen = 100 # for summary, limit width of output
00348         fmt1   = "%-6s %-35s [1]: %s ..."
00349         fmt2   = "       %35s [%d]: %s ..."
00350         print "\nfound a total of ", len(self.workFlows), ' workflows:'
00351         if selected:
00352             print "      of which the following", len(selected), 'were selected:'
00353         #-ap for now:
00354         maxLen = -1  # for individual listing, no limit on width
00355         fmt1   = "%-6s %-35s [1]: %s " 
00356         fmt2   = "       %35s [%d]: %s"
00357 
00358         N=[]
00359         for wf in self.workFlows:
00360             if selected and float(wf.numId) not in selected: continue
00361             if extended: print ''
00362             #pad with zeros
00363             for i in range(len(N),len(wf.cmds)):                N.append(0)
00364             N[len(wf.cmds)-1]+=1
00365             wfName, stepNames = wf.nameId.split('+',1)
00366             for i,s in enumerate(wf.cmds):
00367                 if extended:
00368                     if i==0:
00369                         print fmt1 % (wf.numId, stepNames, (str(s)+' ')[:maxLen])
00370                     else:
00371                         print fmt2 % ( ' ', i+1, (str(s)+' ')[:maxLen])
00372                 else:
00373                     print "%-6s %-35s "% (wf.numId, stepNames)
00374                     break
00375         print ''
00376         for i,n in enumerate(N):
00377             if n:            print n,'workflows with',i+1,'steps'
00378 
00379         return
00380     
00381     def createWorkFlows(self, fileNameIn):
00382 
00383         prefixIn = self.filesPrefMap[fileNameIn]
00384 
00385         # get through the list of items and update the requested workflows only
00386         keyList = self.workFlowSteps.keys()
00387         ids = []
00388         for item in keyList:
00389             id, pref = item
00390             if pref != prefixIn : continue
00391             ids.append(id)
00392         ids.sort()
00393         for key in ids:
00394             val = self.workFlowSteps[(key,prefixIn)]
00395             num, name, commands, stepList = val
00396             nameId = str(num)+'_'+name
00397             if nameId in self.nameList:
00398                 print "==> duplicate name found for ", nameId
00399                 print '    keeping  : ', self.nameList[nameId]
00400                 print '    ignoring : ', val
00401             else:
00402                 self.nameList[nameId] = val
00403 
00404             self.workFlows.append(WorkFlow(num, name, commands=commands))
00405 
00406         return
00407 
00408     def prepare(self, useInput=None, refRel='', fromScratch=None):
00409         
00410         for matrixFile in self.files:
00411             if self.what != 'all' and self.what not in matrixFile:
00412                 print "ignoring non-requested file",matrixFile
00413                 continue
00414 
00415             try:
00416                 self.readMatrix(matrixFile, useInput, refRel, fromScratch)
00417             except Exception, e:
00418                 print "ERROR reading file:", matrixFile, str(e)
00419                 raise
00420             
00421             try:
00422                 self.createWorkFlows(matrixFile)
00423             except Exception, e:
00424                 print "ERROR creating workflows :", str(e)
00425                 raise
00426             
00427                 
00428     def show(self, selected=None, extended=True):    
00429 
00430         self.showWorkFlows(selected,extended)
00431         print '\n','-'*80,'\n'
00432 
00433 
00434     def updateDB(self):
00435 
00436         import pickle
00437         pickle.dump(self.workFlows, open('theMatrix.pkl', 'w') )
00438 
00439         return
00440