CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_1/src/Utilities/ReleaseScripts/scripts/duplicateReflexLibrarySearch.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 
00003 import optparse
00004 import os
00005 import commands
00006 import re
00007 import sys
00008 import pprint
00009 import commands
00010 import subprocess
00011 from XML2Python import xml2obj
00012 
00013 # These aren't all typedefs, but can sometimes make the output more
00014 # readable
00015 typedefsDict = \
00016              {
00017     # What we want <=  What we have
00018     'unsigned int' : ['unsignedint', 'UInt32_t', 'uint32_t'],
00019     'unsigned long': ['unsignedlong'],
00020     'int'          : ['Int32_t'],
00021     'float'        : ['Float_t'],
00022     'double'       : ['Double_t'],
00023     'char'         : ['Char_t'],
00024     '< '           : ['<', '&lt;'],
00025     ' >'           : ['>', '&gt;'],
00026     ', '           : [','],
00027     }
00028 
00029 
00030 # Equivalent names for packages - lets script know that, for example,
00031 # 'TrackReco' package should have objects 'reco::Track'.
00032 #Ordered List to search for matched packages
00033 equivDict = \
00034      [
00035          {'GsfTracking'           : ['reco::GsfTrack(Collection|).*(MomentumConstraint|VertexConstraint)', 'Trajectory.*reco::GsfTrack']},
00036          {'ParallelAnalysis'      : ['examples::TrackAnalysisAlgorithm']},
00037          {'PatCandidates'         : ['pat::PATObject','pat::Lepton']},
00038          {'BTauReco'              : ['reco::SoftLeptonProperties','reco::SecondaryVertexTagInfo']},
00039          {'CastorReco'            : ['reco::CastorJet']},
00040          {'JetMatching'           : ['reco::JetFlavour','reco::MatchedPartons']},
00041          {'TrackingAnalysis'      : ['TrackingParticle']},
00042          {'Egamma'                : ['reco::ElectronID']},
00043          {'TopObjects'            : ['reco::CATopJetProperties']},
00044          {'TauReco'               : ['reco::L2TauIsolationInfo','reco::RecoTauPiZero','reco::BaseTau']},
00045          {'ValidationFormats'     : ['PGlobalDigi::.+','PGlobalRecHit::.+']},
00046          {'TrajectorySeed'        : ['TrajectorySeed']},
00047          {'TrackCandidate'        : ['TrackCandidate']},
00048          {'PatternTools'          : ['MomentumConstraint','VertexConstraint','Trajectory']},
00049          {'TrackerRecHit2D'       : ['SiStrip(Matched|)RecHit[12]D','SiTrackerGSRecHit[12]D','SiPixelRecHit']},
00050          {'MuonReco'              : ['reco::Muon(Ref|)(Vector|)']},
00051          {'MuonSeed'              : ['L3MuonTrajectorySeed']},
00052          {'HepMCCandidate'        : ['reco::GenParticle.*']},
00053          {'L1Trigger'             : ['l1extra::L1.+Particle']},
00054          {'TrackInfo'             : ['reco::TrackingRecHitInfo']},
00055          {'EgammaCandidates'      : ['reco::GsfElectron.*','reco::Photon.*']},
00056          {'HcalIsolatedTrack'     : ['reco::IsolatedPixelTrackCandidate', 'reco::EcalIsolatedParticleCandidate']},
00057          {'HcalRecHit'            : ['HFRecHit','HORecHit','ZDCRecHit','HBHERecHit']},
00058          {'PFRootEvent'           : ['EventColin::']},
00059          {'CaloTowers'            : ['CaloTower.*']},
00060          {'GsfTrackReco'          : ['GsfTrack.*']},
00061          {'METReco'               : ['reco::(Calo|PF|Gen|)MET','reco::PFClusterMET']},
00062          {'ParticleFlowReco'      : ['reco::RecoPFClusterRefCandidateRef.*']},
00063          {'ParticleFlowCandidate' : ['reco::PFCandidateRef','reco::PFCandidateFwdRef']},
00064          {'PhysicsToolsObjects'   : ['PhysicsTools::Calibration']},
00065          {'RecoCandidate'         : ['reco::Candidate']},
00066          {'TrackReco'             : ['reco::Track']},
00067          {'VertexReco'            : ['reco::Vertex']},
00068          {'TFWLiteSelectorTest'   : ['tfwliteselectortest']},
00069          {'PatCandidates'         : ['reco::RecoCandidate','pat::[A-Za-z]+Ref(Vector|)']},
00070          {'JetReco'               : ['reco::.*Jet','reco::.*Jet(Collection|Ref)']},
00071      ]
00072 
00073 ignoreEdmDP = {
00074   'LCGReflex/__gnu_cxx::__normal_iterator<std::basic_string<char>*,std::vector<std::basic_string<char>%>%>' : 1,
00075   '' : 1
00076 }
00077 
00078 def getReleaseBaseDir ():
00079     """ return CMSSW_RELEASE_BASE or CMSSW_BASE depending on the
00080     dev area of release area """
00081     baseDir = os.environ.get('CMSSW_RELEASE_BASE')
00082     if not len (baseDir):
00083         baseDir = os.environ.get('CMSSW_BASE')
00084     return baseDir
00085 
00086 
00087 def searchClassDefXml (srcDir):
00088     """ Searches through the requested directory looking at
00089     'classes_def.xml' files looking for duplicate Reflex definitions."""
00090     # compile necessary RE statements
00091     classNameRE    = re.compile (r'class\s+name\s*=\s*"([^"]*)"')
00092     spacesRE       = re.compile (r'\s+')
00093     stdRE          = re.compile (r'std::')
00094     srcClassNameRE = re.compile (r'(\w+)/src/classes_def.xml')
00095     ignoreSrcRE    = re.compile (r'.*/FWCore/Skeletons/scripts/mkTemplates/.+')
00096     braketRE       = re.compile (r'<.+>')
00097     # get the source directory we want
00098     if not len (srcDir):
00099         try:
00100             srcDir = getReleaseBaseDir() + '/src'
00101         except:
00102             raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00103     try:
00104         os.chdir (srcDir)
00105     except:
00106         raise RuntimeError, "'%s' is not a valid directory." % srcDir
00107     print "Searching for 'classes_def.xml' in '%s'." % srcDir
00108     xmlFiles = commands.getoutput ('find . -name "*classes_def.xml" -print').\
00109                split ('\n')
00110     # print out the XML files, if requested
00111     if options.showXMLs:
00112         pprint.pprint (xmlFiles)
00113     # try and figure out the names of the packages
00114     xmlPackages = []
00115     packagesREs = {}
00116     equivREs    = {}
00117     explicitREs = []
00118     for item in equivDict:
00119         for pack in item:
00120             for equiv in item[pack]:
00121                 explicitREs.append( (re.compile(r'\b' + equiv + r'\b'),pack))
00122     if options.lostDefs:
00123         for filename in xmlFiles:
00124             if (not filename) or (ignoreSrcRE.match(filename)): continue
00125             match = srcClassNameRE.search (filename)
00126             if not match: continue
00127             packageName = match.group(1)
00128             xmlPackages.append (packageName)
00129             matchString = r'\b' + packageName + r'\b'
00130             packagesREs[packageName] = re.compile (matchString)
00131             equivList = equivREs.setdefault (packageName, [])
00132             for item in equivDict:
00133                 for equiv in item.get (packageName, []):
00134                     matchString = re.compile(r'\b' + equiv + r'\b')
00135                     equivList.append( (matchString, equiv) )
00136             equivList.append( (packagesREs[packageName], packageName) )
00137     #pprint.pprint (equivREs, width=109)
00138     classDict = {}
00139     ncdict = {'class' : 'className'}
00140     for filename in xmlFiles:
00141         if (not filename) or (ignoreSrcRE.match(filename)): continue
00142         dupProblems     = ''
00143         exceptName      = ''
00144         regexList       = []
00145         localObjects    = []
00146         simpleObjectREs = []
00147         if options.lostDefs:
00148             lostMatch = srcClassNameRE.search (filename)
00149             if lostMatch:
00150                 exceptName = lostMatch.group (1)
00151                 regexList = equivREs[exceptName]
00152                 xcount = len(regexList)-1
00153                 if not regexList[xcount][0].search (exceptName):
00154                     print '%s not found in' % exceptName,
00155                     print regexList[xcount][0]
00156                     sys.exit()
00157             else: continue
00158         if options.verbose:
00159             print "filename", filename
00160         try:
00161             xmlObj = xml2obj (filename = filename,
00162                               filtering = True,
00163                               nameChangeDict = ncdict)
00164         except Exception as detail:
00165             print "File %s is malformed XML.  Please fix." % filename
00166             print "  ", detail
00167             continue
00168         try:
00169             classList = xmlObj.selection.className
00170         except:
00171             try:
00172                 classList = xmlObj.className
00173             except:
00174                 # this isn't a real classes_def.xml file.  Skip it
00175                 print "**** SKIPPING '%s' - Doesn't seem to have proper information." % filename
00176                 continue
00177         for piece in classList:
00178             try:
00179                 className = spacesRE.sub ('', piece.name)
00180             except:
00181                 # must be one of these class pattern things.  Skip it
00182                 #print "     skipping %s" % filename, piece.__repr__()
00183                 continue
00184             className = stdRE.sub    ('', className)
00185             # print "  ", className
00186             # Now get rid of any typedefs
00187             for typedef, tdList in typedefsDict.iteritems():
00188                 for alias in tdList:
00189                     className = re.sub (alias, typedef, className)
00190             classDict.setdefault (className, set()).add (filename)
00191             # should we check for lost definitions?
00192             if not options.lostDefs:
00193                 continue
00194             localObjects.append (className)
00195             if options.lazyLostDefs and not braketRE.search (className):
00196                 #print "  ", className
00197                 matchString = r'\b' + className + r'\b'
00198                 simpleObjectREs.append( (re.compile (matchString), className ) )
00199         for className in localObjects:
00200             # if we see our name (or equivalent) here, then let's
00201             # skip complaining about this
00202             foundEquiv = False
00203             for equivRE in regexList:
00204                 #print "searching %s for %s" % (equivRE[1], className)
00205                 if equivRE[0].search (className):
00206                     foundEquiv = True
00207                     break
00208             for simpleRE in simpleObjectREs:
00209                 if simpleRE[0].search (className):
00210                     foundEquiv = True
00211                     if options.verbose and simpleRE[1] != className:
00212                         print "    Using %s to ignore %s" \
00213                               % (simpleRE[1], className)                    
00214                     break
00215             if foundEquiv: continue
00216             for exRes in explicitREs:
00217                 if exRes[0].search(className):
00218                     dupProblems += "  %s : %s\n" % (exRes[1], className)
00219                     foundEquiv = True
00220                     break
00221             if foundEquiv: continue
00222             for packageName in xmlPackages:
00223                 # don't bother looking for the name of this
00224                 # package in this package
00225                 if packagesREs[packageName].search (className):
00226                     dupProblems += "  %s : %s\n" % (packageName, className)
00227                     break
00228         # for piece
00229         if dupProblems:
00230             print '\n%s\n%s\n' % (filename, dupProblems)
00231     # for filename
00232     if options.dups:
00233         for name, fileSet in sorted( classDict.iteritems() ):
00234             if len (fileSet) < 2:
00235                 continue
00236             print name
00237             fileList = list (fileSet)
00238             fileList.sort()
00239             for filename in fileList:
00240                 print "  ", filename
00241             print
00242         # for name, fileSet
00243     # if not noDups
00244     #pprint.pprint (classDict)
00245 
00246 
00247 def searchDuplicatePlugins (edmpluginFile):
00248     """ Searches the edmpluginFile to find any duplicate
00249     plugins."""
00250     cmd = "cat %s | awk '{print $2\" \"$1}' | sort | uniq | awk '{print $1}' | sort | uniq -c | grep '2 ' | awk '{print $2}'" % edmpluginFile
00251     output = commands.getoutput (cmd).split('\n')
00252     for line in output:
00253       if ignoreEdmDP.has_key(line): continue
00254       line = line.replace("*","\*")
00255       cmd = "cat %s | grep ' %s ' | awk '{print $1}' | sort | uniq " % (edmpluginFile,line)
00256       out1 = commands.getoutput (cmd).split('\n')
00257       print line
00258       for plugin in out1:
00259         if plugin:
00260             print "   **"+plugin+"**"
00261       print
00262 
00263 def searchEdmPluginDump (edmpluginFile, srcDir):
00264     """ Searches the edmpluginFile to find any duplicate Reflex
00265     definitions."""
00266     if not len (edmpluginFile):
00267         try:
00268             edmpluginFile = getReleaseBaseDir() + '/lib/' + \
00269                             os.environ.get('SCRAM_ARCH') + '/.edmplugincache'
00270         except:
00271             raise RuntimeError,  \
00272                   "$CMSSW_RELEASE_BASE or $SCRAM_ARCH not found."
00273     if not len (srcDir):
00274         try:
00275             srcDir = getReleaseBaseDir() + '/src'
00276         except:
00277             raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00278     try:
00279         os.chdir (srcDir)
00280     except:
00281         raise RuntimeError, "'%s' is not a valid directory." % srcDir
00282     searchDuplicatePlugins (edmpluginFile)
00283     packageNames = commands.getoutput ('ls -1').split ('\n')
00284     global packageREs
00285     #print "pN", packageNames
00286     for package in packageNames:
00287         packageREs.append( re.compile( r'^(' + package + r')(\S+)$') )
00288     # read the pipe of the grep command
00289     prevLine = ''
00290     searchREs = [];
00291     doSearch = False
00292     if options.searchFor:
00293         fixSpacesRE = re.compile (r'\s+');
00294         doSearch = True
00295         words = options.searchFor.split('|')
00296         #print "words:", words
00297         for word in words:
00298             word = fixSpacesRE.sub (r'.*', word);
00299             searchREs.append( re.compile (word) )
00300     problemSet = set()
00301     cmd = "grep Reflex %s | awk '{print $2}' | sort" % edmpluginFile
00302     for line in commands.getoutput (cmd).split('\n'):
00303         if doSearch:
00304             for regex in searchREs:
00305                 if regex.search (line):
00306                     problemSet.add (line)
00307                     break
00308         else:
00309             if line == prevLine:
00310                 if not ignoreEdmDP.has_key(line):
00311                     problemSet.add (line)
00312             # print line
00313             prevLine = line
00314     # Look up in which libraries the problems are found
00315     pluginCapRE = re.compile (r'plugin(\S+?)Capabilities.so')
00316     fixStarsRE  = re.compile (r'\*')
00317     lcgReflexRE = re.compile (r'^LCGReflex/')
00318     percentRE   = re.compile (r'%')
00319     problemList = sorted (list (problemSet))    
00320     for problem in problemList:
00321         # Unbackwhacked stars will mess with the grep command.  So
00322         # let's fix them now and not worry about it
00323         fixedProblem = fixStarsRE.sub (r'\*', problem)
00324         cmd = 'grep "%s" %s | awk \'{print $1}\'' % (fixedProblem,
00325                                                      edmpluginFile)
00326         # print 'cmd', cmd
00327         output = commands.getoutput (cmd).split('\n')
00328         problem = lcgReflexRE.sub (r'', problem)
00329         problem = percentRE.sub   (r' ', problem)
00330         print problem
00331         #if doSearch: continue
00332         for line in output:
00333             match = pluginCapRE.match (line)
00334             if match:                          
00335                 line = match.group(1)
00336             print "  ", getXmlName (line)
00337         print
00338 
00339 def getXmlName (line):
00340     """Given a line from EDM plugin dump, try to get XML file name."""
00341     global packageMatchDict
00342     retval = packageMatchDict.get (line)
00343     if retval:
00344         return retval
00345     for regex in packageREs:
00346         match = regex.search (line)
00347         if match:
00348             xmlFile = "./%s/%s/src/classes_def.xml" % \
00349                       (match.group(1), match.group(2))
00350             if os.path.exists (xmlFile):
00351                 packageMatchDict [line] = xmlFile
00352                 return xmlFile
00353             #return "**%s/%s**" % (match.group(1), match.group(2)) If
00354     # we're here, then we haven't been successful yet.  Let's try the
00355     # brute force approach.
00356     # Try 1
00357     cmd = 'find . -name classes_def.xml -print | grep %s' % line
00358     output = commands.getoutput (cmd).split ('\n')
00359     if output and len (output) == 1:
00360         retval = output[0];
00361         if retval:
00362             packageMatchDict [line] = retval
00363             return retval
00364     # Try 2
00365     cmd = 'find . -name "BuildFile" -exec grep -q %s {} \; -print' % line
00366     output = commands.getoutput (cmd).split ('\n')
00367     if output and len (output) == 1:
00368         retval = output[0];
00369         if retval:
00370             retval = retval + ' (%s)' % line
00371             packageMatchDict [line] = retval
00372             return retval
00373     return "**" +  line + "**"
00374     
00375 
00376 
00377 packageREs = [];
00378 packageMatchDict = {}
00379 
00380 if __name__ == "__main__":
00381     # setup options parser
00382     parser = optparse.OptionParser ("Usage: %prog [options]\n"\
00383                                     "Searches classes_def.xml for duplicate "\
00384                                     "definitions")
00385     xmlGroup  = optparse.OptionGroup (parser, "ClassDef XML options")
00386     dumpGroup = optparse.OptionGroup (parser, "EdmPluginDump options")
00387     xmlGroup.add_option ('--dups', dest='dups', action='store_true',
00388                          default=False,
00389                          help="Search for duplicate definitions")
00390     xmlGroup.add_option ('--lostDefs', dest='lostDefs', action='store_true',
00391                          default=False,
00392                          help="Looks for definitions in the wrong libraries")
00393     xmlGroup.add_option ('--lazyLostDefs', dest='lazyLostDefs',
00394                          action='store_true',
00395                          default=False,
00396                          help="Will try to ignore as many lost defs as reasonable")
00397     xmlGroup.add_option ('--verbose', dest='verbose',
00398                          action='store_true',
00399                          default=False,
00400                          help="Prints out a lot of information")
00401     xmlGroup.add_option ('--showXMLs', dest='showXMLs', action='store_true',
00402                          default=False,
00403                          help="Shows all 'classes_def.xml' files")
00404     xmlGroup.add_option ('--dir', dest='srcdir', type='string', default='',
00405                          help="directory to search for 'classes_def.xml'"\
00406                          " files (default: $CMSSW_RELEASE_BASE/src)")
00407     dumpGroup.add_option ('--edmPD', dest='edmPD', action='store_true',
00408                           default=False,
00409                           help="Searches EDM Plugin Dump for duplicates")
00410     dumpGroup.add_option ('--edmFile', dest='edmFile', type='string',
00411                           default='',
00412                           help="EDM Plugin Dump cache file'"\
00413                           " (default: $CMSSW_RELEASE_BASE/lib/"\
00414                           "$SCRAM_ARCH/.edmplugincache)")
00415     dumpGroup.add_option ('--searchFor', dest='searchFor', type='string',
00416                           default='',
00417                           help="Search EPD for given pipe-separated (|) regexs"
00418                           " instead of duplicates")
00419     parser.add_option_group (xmlGroup)
00420     parser.add_option_group (dumpGroup)
00421     (options, args) = parser.parse_args()
00422 
00423     # Let's go:
00424     if options.lazyLostDefs:
00425         options.lostDefs = True
00426     if options.showXMLs or options.lostDefs or options.dups:
00427         searchClassDefXml (options.srcdir)
00428     if options.edmPD:
00429         searchEdmPluginDump (options.edmFile, options.srcdir)