CMS 3D CMS Logo

/data/doxygen/doxygen-1.7.3/gen/CMSSW_4_2_8/src/Utilities/ReleaseScripts/scripts/duplicateReflexLibrarySearch.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 
00003 import optparse
00004 import os
00005 import commands
00006 import re
00007 import sys
00008 import pprint
00009 import commands
00010 import subprocess
00011 from XML2Python import xml2obj
00012 
00013 # These aren't all typedefs, but can sometimes make the output more
00014 # readable
00015 typedefsDict = \
00016              {
00017     # What we want <=  What we have
00018     'unsigned int' : ['unsignedint', 'UInt32_t', 'uint32_t'],
00019     'unsigned long': ['unsignedlong'],
00020     'int'          : ['Int32_t'],
00021     'float'        : ['Float_t'],
00022     'double'       : ['Double_t'],
00023     'char'         : ['Char_t'],
00024     '< '           : ['<', '&lt;'],
00025     ' >'           : ['>', '&gt;'],
00026     ', '           : [','],
00027     }
00028 
00029 
00030 # Equivalent names for packages - lets script know that, for example,
00031 # 'TrackReco' package should have objects 'reco::Track'.
00032 #Ordered List to search for matched packages
00033 equivDict = \
00034      [
00035          {'GsfTracking'           : ['reco::GsfTrack(Collection|).*(MomentumConstraint|VertexConstraint)', 'Trajectory.*reco::GsfTrack']},
00036          {'ParallelAnalysis'      : ['examples::TrackAnalysisAlgorithm']},
00037          {'PatCandidates'         : ['pat::PATObject','pat::Lepton']},
00038          {'BTauReco'              : ['reco::SoftLeptonProperties','reco::SecondaryVertexTagInfo']},
00039          {'CastorReco'            : ['reco::CastorJet']},
00040          {'JetMatching'           : ['reco::JetFlavour','reco::MatchedPartons']},
00041          {'TrackingAnalysis'      : ['TrackingParticle']},
00042          {'Egamma'                : ['reco::ElectronID']},
00043          {'TopObjects'            : ['reco::CATopJetProperties']},
00044          {'TauReco'               : ['reco::L2TauIsolationInfo','reco::RecoTauPiZero','reco::BaseTau']},
00045          {'ValidationFormats'     : ['PGlobalDigi::.+','PGlobalRecHit::.+']},
00046          {'TrajectorySeed'        : ['TrajectorySeed']},
00047          {'TrackCandidate'        : ['TrackCandidate']},
00048          {'PatternTools'          : ['MomentumConstraint','VertexConstraint','Trajectory']},
00049          {'TrackerRecHit2D'       : ['SiStrip(Matched|)RecHit[12]D','SiTrackerGSRecHit[12]D','SiPixelRecHit']},
00050          {'MuonReco'              : ['reco::Muon(Ref|)(Vector|)']},
00051          {'MuonSeed'              : ['L3MuonTrajectorySeed']},
00052          {'HepMCCandidate'        : ['reco::GenParticle.*']},
00053          {'L1Trigger'             : ['l1extra::L1.+Particle']},
00054          {'TrackInfo'             : ['reco::TrackingRecHitInfo']},
00055          {'EgammaCandidates'      : ['reco::GsfElectron.*','reco::Photon.*']},
00056          {'HcalIsolatedTrack'     : ['reco::IsolatedPixelTrackCandidate', 'reco::EcalIsolatedParticleCandidate']},
00057          {'HcalRecHit'            : ['HFRecHit','HORecHit','ZDCRecHit','HBHERecHit']},
00058          {'PFRootEvent'           : ['EventColin::']},
00059          {'CaloTowers'            : ['CaloTower.*']},
00060          {'GsfTrackReco'          : ['GsfTrack.*']},
00061          {'METReco'               : ['reco::(Calo|PF|Gen|)MET','reco::PFClusterMET']},
00062          {'ParticleFlowCandidate' : ['reco::PFCandidateRef','reco::PFCandidateFwdRef']},
00063          {'PhysicsToolsObjects'   : ['PhysicsTools::Calibration']},
00064          {'RecoCandidate'         : ['reco::Candidate']},
00065          {'TrackReco'             : ['reco::Track']},
00066          {'VertexReco'            : ['reco::Vertex']},
00067          {'TFWLiteSelectorTest'   : ['tfwliteselectortest']},
00068          {'PatCandidates'         : ['reco::RecoCandidate','pat::[A-Za-z]+Ref(Vector|)']},
00069          {'JetReco'               : ['reco::.*Jet','reco::.*Jet(Collection|Ref)']},
00070      ]
00071 
00072 ignoreEdmDP = {
00073   'LCGReflex/__gnu_cxx::__normal_iterator<std::basic_string<char>*,std::vector<std::basic_string<char>%>%>' : 1,
00074   '' : 1
00075 }
00076 
00077 def getReleaseBaseDir ():
00078     """ return CMSSW_RELEASE_BASE or CMSSW_BASE depending on the
00079     dev area of release area """
00080     baseDir = os.environ.get('CMSSW_RELEASE_BASE')
00081     if not len (baseDir):
00082         baseDir = os.environ.get('CMSSW_BASE')
00083     return baseDir
00084 
00085 
00086 def searchClassDefXml (srcDir):
00087     """ Searches through the requested directory looking at
00088     'classes_def.xml' files looking for duplicate Reflex definitions."""
00089     # compile necessary RE statements
00090     classNameRE    = re.compile (r'class\s+name\s*=\s*"([^"]*)"')
00091     spacesRE       = re.compile (r'\s+')
00092     stdRE          = re.compile (r'std::')
00093     srcClassNameRE = re.compile (r'(\w+)/src/classes_def.xml')
00094     ignoreSrcRE    = re.compile (r'.*/FWCore/Skeletons/scripts/mkTemplates/.+')
00095     braketRE       = re.compile (r'<.+>')
00096     # get the source directory we want
00097     if not len (srcDir):
00098         try:
00099             srcDir = getReleaseBaseDir() + '/src'
00100         except:
00101             raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00102     try:
00103         os.chdir (srcDir)
00104     except:
00105         raise RuntimeError, "'%s' is not a valid directory." % srcDir
00106     print "Searching for 'classes_def.xml' in '%s'." % srcDir
00107     xmlFiles = commands.getoutput ('find . -name "*classes_def.xml" -print').\
00108                split ('\n')
00109     # print out the XML files, if requested
00110     if options.showXMLs:
00111         pprint.pprint (xmlFiles)
00112     # try and figure out the names of the packages
00113     xmlPackages = []
00114     packagesREs = {}
00115     equivREs    = {}
00116     explicitREs = []
00117     for item in equivDict:
00118         for pack in item:
00119             for equiv in item[pack]:
00120                 explicitREs.append( (re.compile(r'\b' + equiv + r'\b'),pack))
00121     if options.lostDefs:
00122         for filename in xmlFiles:
00123             if (not filename) or (ignoreSrcRE.match(filename)): continue
00124             match = srcClassNameRE.search (filename)
00125             if not match: continue
00126             packageName = match.group(1)
00127             xmlPackages.append (packageName)
00128             matchString = r'\b' + packageName + r'\b'
00129             packagesREs[packageName] = re.compile (matchString)
00130             equivList = equivREs.setdefault (packageName, [])
00131             for item in equivDict:
00132                 for equiv in item.get (packageName, []):
00133                     matchString = re.compile(r'\b' + equiv + r'\b')
00134                     equivList.append( (matchString, equiv) )
00135             equivList.append( (packagesREs[packageName], packageName) )
00136     #pprint.pprint (equivREs, width=109)
00137     classDict = {}
00138     ncdict = {'class' : 'className'}
00139     for filename in xmlFiles:
00140         if (not filename) or (ignoreSrcRE.match(filename)): continue
00141         dupProblems     = ''
00142         exceptName      = ''
00143         regexList       = []
00144         localObjects    = []
00145         simpleObjectREs = []
00146         if options.lostDefs:
00147             lostMatch = srcClassNameRE.search (filename)
00148             if lostMatch:
00149                 exceptName = lostMatch.group (1)
00150                 regexList = equivREs[exceptName]
00151                 xcount = len(regexList)-1
00152                 if not regexList[xcount][0].search (exceptName):
00153                     print '%s not found in' % exceptName,
00154                     print regexList[xcount][0]
00155                     sys.exit()
00156             else: continue
00157         if options.verbose:
00158             print "filename", filename
00159         try:
00160             xmlObj = xml2obj (filename = filename,
00161                               filtering = True,
00162                               nameChangeDict = ncdict)
00163         except Exception as detail:
00164             print "File %s is malformed XML.  Please fix." % filename
00165             print "  ", detail
00166             continue
00167         try:
00168             classList = xmlObj.selection.className
00169         except:
00170             try:
00171                 classList = xmlObj.className
00172             except:
00173                 # this isn't a real classes_def.xml file.  Skip it
00174                 print "**** SKIPPING '%s' - Doesn't seem to have proper information." % filename
00175                 continue
00176         for piece in classList:
00177             try:
00178                 className = spacesRE.sub ('', piece.name)
00179             except:
00180                 # must be one of these class pattern things.  Skip it
00181                 #print "     skipping %s" % filename, piece.__repr__()
00182                 continue
00183             className = stdRE.sub    ('', className)
00184             # print "  ", className
00185             # Now get rid of any typedefs
00186             for typedef, tdList in typedefsDict.iteritems():
00187                 for alias in tdList:
00188                     className = re.sub (alias, typedef, className)
00189             classDict.setdefault (className, set()).add (filename)
00190             # should we check for lost definitions?
00191             if not options.lostDefs:
00192                 continue
00193             localObjects.append (className)
00194             if options.lazyLostDefs and not braketRE.search (className):
00195                 #print "  ", className
00196                 matchString = r'\b' + className + r'\b'
00197                 simpleObjectREs.append( (re.compile (matchString), className ) )
00198         for className in localObjects:
00199             # if we see our name (or equivalent) here, then let's
00200             # skip complaining about this
00201             foundEquiv = False
00202             for equivRE in regexList:
00203                 #print "searching %s for %s" % (equivRE[1], className)
00204                 if equivRE[0].search (className):
00205                     foundEquiv = True
00206                     break
00207             for simpleRE in simpleObjectREs:
00208                 if simpleRE[0].search (className):
00209                     foundEquiv = True
00210                     if options.verbose and simpleRE[1] != className:
00211                         print "    Using %s to ignore %s" \
00212                               % (simpleRE[1], className)                    
00213                     break
00214             if foundEquiv: continue
00215             for exRes in explicitREs:
00216                 if exRes[0].search(className):
00217                     dupProblems += "  %s : %s\n" % (exRes[1], className)
00218                     foundEquiv = True
00219                     break
00220             if foundEquiv: continue
00221             for packageName in xmlPackages:
00222                 # don't bother looking for the name of this
00223                 # package in this package
00224                 if packagesREs[packageName].search (className):
00225                     dupProblems += "  %s : %s\n" % (packageName, className)
00226                     break
00227         # for piece
00228         if dupProblems:
00229             print '\n%s\n%s\n' % (filename, dupProblems)
00230     # for filename
00231     if options.dups:
00232         for name, fileSet in sorted( classDict.iteritems() ):
00233             if len (fileSet) < 2:
00234                 continue
00235             print name
00236             fileList = list (fileSet)
00237             fileList.sort()
00238             for filename in fileList:
00239                 print "  ", filename
00240             print
00241         # for name, fileSet
00242     # if not noDups
00243     #pprint.pprint (classDict)
00244 
00245 
00246 def searchDuplicatePlugins (edmpluginFile):
00247     """ Searches the edmpluginFile to find any duplicate
00248     plugins."""
00249     cmd = "cat %s | awk '{print $2\" \"$1}' | sort | uniq | awk '{print $1}' | sort | uniq -c | grep '2 ' | awk '{print $2}'" % edmpluginFile
00250     output = commands.getoutput (cmd).split('\n')
00251     for line in output:
00252       if ignoreEdmDP.has_key(line): continue
00253       line = line.replace("*","\*")
00254       cmd = "cat %s | grep ' %s ' | awk '{print $1}' | sort | uniq " % (edmpluginFile,line)
00255       out1 = commands.getoutput (cmd).split('\n')
00256       print line
00257       for plugin in out1:
00258         if plugin:
00259             print "   **"+plugin+"**"
00260       print
00261 
00262 def searchEdmPluginDump (edmpluginFile, srcDir):
00263     """ Searches the edmpluginFile to find any duplicate Reflex
00264     definitions."""
00265     if not len (edmpluginFile):
00266         try:
00267             edmpluginFile = getReleaseBaseDir() + '/lib/' + \
00268                             os.environ.get('SCRAM_ARCH') + '/.edmplugincache'
00269         except:
00270             raise RuntimeError,  \
00271                   "$CMSSW_RELEASE_BASE or $SCRAM_ARCH not found."
00272     if not len (srcDir):
00273         try:
00274             srcDir = getReleaseBaseDir() + '/src'
00275         except:
00276             raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00277     try:
00278         os.chdir (srcDir)
00279     except:
00280         raise RuntimeError, "'%s' is not a valid directory." % srcDir
00281     searchDuplicatePlugins (edmpluginFile)
00282     packageNames = commands.getoutput ('ls -1').split ('\n')
00283     global packageREs
00284     #print "pN", packageNames
00285     for package in packageNames:
00286         packageREs.append( re.compile( r'^(' + package + r')(\S+)$') )
00287     # read the pipe of the grep command
00288     prevLine = ''
00289     searchREs = [];
00290     doSearch = False
00291     if options.searchFor:
00292         fixSpacesRE = re.compile (r'\s+');
00293         doSearch = True
00294         words = options.searchFor.split('|')
00295         #print "words:", words
00296         for word in words:
00297             word = fixSpacesRE.sub (r'.*', word);
00298             searchREs.append( re.compile (word) )
00299     problemSet = set()
00300     cmd = "grep Reflex %s | awk '{print $2}' | sort" % edmpluginFile
00301     for line in commands.getoutput (cmd).split('\n'):
00302         if doSearch:
00303             for regex in searchREs:
00304                 if regex.search (line):
00305                     problemSet.add (line)
00306                     break
00307         else:
00308             if line == prevLine:
00309                 if not ignoreEdmDP.has_key(line):
00310                     problemSet.add (line)
00311             # print line
00312             prevLine = line
00313     # Look up in which libraries the problems are found
00314     pluginCapRE = re.compile (r'plugin(\S+?)Capabilities.so')
00315     fixStarsRE  = re.compile (r'\*')
00316     lcgReflexRE = re.compile (r'^LCGReflex/')
00317     percentRE   = re.compile (r'%')
00318     problemList = sorted (list (problemSet))    
00319     for problem in problemList:
00320         # Unbackwhacked stars will mess with the grep command.  So
00321         # let's fix them now and not worry about it
00322         fixedProblem = fixStarsRE.sub (r'\*', problem)
00323         cmd = 'grep "%s" %s | awk \'{print $1}\'' % (fixedProblem,
00324                                                      edmpluginFile)
00325         # print 'cmd', cmd
00326         output = commands.getoutput (cmd).split('\n')
00327         problem = lcgReflexRE.sub (r'', problem)
00328         problem = percentRE.sub   (r' ', problem)
00329         print problem
00330         #if doSearch: continue
00331         for line in output:
00332             match = pluginCapRE.match (line)
00333             if match:                          
00334                 line = match.group(1)
00335             print "  ", getXmlName (line)
00336         print
00337 
00338 def getXmlName (line):
00339     """Given a line from EDM plugin dump, try to get XML file name."""
00340     global packageMatchDict
00341     retval = packageMatchDict.get (line)
00342     if retval:
00343         return retval
00344     for regex in packageREs:
00345         match = regex.search (line)
00346         if match:
00347             xmlFile = "./%s/%s/src/classes_def.xml" % \
00348                       (match.group(1), match.group(2))
00349             if os.path.exists (xmlFile):
00350                 packageMatchDict [line] = xmlFile
00351                 return xmlFile
00352             #return "**%s/%s**" % (match.group(1), match.group(2)) If
00353     # we're here, then we haven't been successful yet.  Let's try the
00354     # brute force approach.
00355     # Try 1
00356     cmd = 'find . -name classes_def.xml -print | grep %s' % line
00357     output = commands.getoutput (cmd).split ('\n')
00358     if output and len (output) == 1:
00359         retval = output[0];
00360         if retval:
00361             packageMatchDict [line] = retval
00362             return retval
00363     # Try 2
00364     cmd = 'find . -name "BuildFile" -exec grep -q %s {} \; -print' % line
00365     output = commands.getoutput (cmd).split ('\n')
00366     if output and len (output) == 1:
00367         retval = output[0];
00368         if retval:
00369             retval = retval + ' (%s)' % line
00370             packageMatchDict [line] = retval
00371             return retval
00372     return "**" +  line + "**"
00373     
00374 
00375 
00376 packageREs = [];
00377 packageMatchDict = {}
00378 
00379 if __name__ == "__main__":
00380     # setup options parser
00381     parser = optparse.OptionParser ("Usage: %prog [options]\n"\
00382                                     "Searches classes_def.xml for duplicate "\
00383                                     "definitions")
00384     xmlGroup  = optparse.OptionGroup (parser, "ClassDef XML options")
00385     dumpGroup = optparse.OptionGroup (parser, "EdmPluginDump options")
00386     xmlGroup.add_option ('--dups', dest='dups', action='store_true',
00387                          default=False,
00388                          help="Search for duplicate definitions")
00389     xmlGroup.add_option ('--lostDefs', dest='lostDefs', action='store_true',
00390                          default=False,
00391                          help="Looks for definitions in the wrong libraries")
00392     xmlGroup.add_option ('--lazyLostDefs', dest='lazyLostDefs',
00393                          action='store_true',
00394                          default=False,
00395                          help="Will try to ignore as many lost defs as reasonable")
00396     xmlGroup.add_option ('--verbose', dest='verbose',
00397                          action='store_true',
00398                          default=False,
00399                          help="Prints out a lot of information")
00400     xmlGroup.add_option ('--showXMLs', dest='showXMLs', action='store_true',
00401                          default=False,
00402                          help="Shows all 'classes_def.xml' files")
00403     xmlGroup.add_option ('--dir', dest='srcdir', type='string', default='',
00404                          help="directory to search for 'classes_def.xml'"\
00405                          " files (default: $CMSSW_RELEASE_BASE/src)")
00406     dumpGroup.add_option ('--edmPD', dest='edmPD', action='store_true',
00407                           default=False,
00408                           help="Searches EDM Plugin Dump for duplicates")
00409     dumpGroup.add_option ('--edmFile', dest='edmFile', type='string',
00410                           default='',
00411                           help="EDM Plugin Dump cache file'"\
00412                           " (default: $CMSSW_RELEASE_BASE/lib/"\
00413                           "$SCRAM_ARCH/.edmplugincache)")
00414     dumpGroup.add_option ('--searchFor', dest='searchFor', type='string',
00415                           default='',
00416                           help="Search EPD for given pipe-separated (|) regexs"
00417                           " instead of duplicates")
00418     parser.add_option_group (xmlGroup)
00419     parser.add_option_group (dumpGroup)
00420     (options, args) = parser.parse_args()
00421 
00422     # Let's go:
00423     if options.lazyLostDefs:
00424         options.lostDefs = True
00425     if options.showXMLs or options.lostDefs or options.dups:
00426         searchClassDefXml (options.srcdir)
00427     if options.edmPD:
00428         searchEdmPluginDump (options.edmFile, options.srcdir)