00001
00002
00003 import optparse
00004 import os
00005 import commands
00006 import re
00007 import sys
00008 import pprint
00009 import commands
00010 import subprocess
00011 from XML2Python import xml2obj
00012
00013
00014
00015 typedefsDict = \
00016 {
00017
00018 'unsigned int' : ['unsignedint', 'UInt32_t', 'uint32_t'],
00019 'unsigned long': ['unsignedlong'],
00020 'int' : ['Int32_t'],
00021 'float' : ['Float_t'],
00022 'double' : ['Double_t'],
00023 'char' : ['Char_t'],
00024 '< ' : ['<', '<'],
00025 ' >' : ['>', '>'],
00026 ', ' : [','],
00027 }
00028
00029
00030
00031
00032
00033 equivDict = \
00034 [
00035 {'GsfTracking' : ['reco::GsfTrack(Collection|).*(MomentumConstraint|VertexConstraint)', 'Trajectory.*reco::GsfTrack']},
00036 {'ParallelAnalysis' : ['examples::TrackAnalysisAlgorithm']},
00037 {'PatCandidates' : ['pat::PATObject','pat::Lepton']},
00038 {'BTauReco' : ['reco::SoftLeptonProperties','reco::SecondaryVertexTagInfo']},
00039 {'CastorReco' : ['reco::CastorJet']},
00040 {'JetMatching' : ['reco::JetFlavour','reco::MatchedPartons']},
00041 {'TrackingAnalysis' : ['TrackingParticle']},
00042 {'Egamma' : ['reco::ElectronID']},
00043 {'TopObjects' : ['reco::CATopJetProperties']},
00044 {'TauReco' : ['reco::L2TauIsolationInfo','reco::RecoTauPiZero','reco::BaseTau']},
00045 {'ValidationFormats' : ['PGlobalDigi::.+','PGlobalRecHit::.+']},
00046 {'TrajectorySeed' : ['TrajectorySeed']},
00047 {'TrackCandidate' : ['TrackCandidate']},
00048 {'PatternTools' : ['MomentumConstraint','VertexConstraint','Trajectory']},
00049 {'TrackerRecHit2D' : ['SiStrip(Matched|)RecHit[12]D','SiTrackerGSRecHit[12]D','SiPixelRecHit']},
00050 {'MuonReco' : ['reco::Muon(Ref|)(Vector|)']},
00051 {'MuonSeed' : ['L3MuonTrajectorySeed']},
00052 {'HepMCCandidate' : ['reco::GenParticle.*']},
00053 {'L1Trigger' : ['l1extra::L1.+Particle']},
00054 {'TrackInfo' : ['reco::TrackingRecHitInfo']},
00055 {'EgammaCandidates' : ['reco::GsfElectron.*','reco::Photon.*']},
00056 {'HcalIsolatedTrack' : ['reco::IsolatedPixelTrackCandidate', 'reco::EcalIsolatedParticleCandidate']},
00057 {'HcalRecHit' : ['HFRecHit','HORecHit','ZDCRecHit','HBHERecHit']},
00058 {'PFRootEvent' : ['EventColin::']},
00059 {'CaloTowers' : ['CaloTower.*']},
00060 {'GsfTrackReco' : ['GsfTrack.*']},
00061 {'METReco' : ['reco::(Calo|PF|Gen|)MET','reco::PFClusterMET']},
00062 {'ParticleFlowReco' : ['reco::RecoPFClusterRefCandidateRef.*']},
00063 {'ParticleFlowCandidate' : ['reco::PFCandidateRef','reco::PFCandidateFwdRef']},
00064 {'PhysicsToolsObjects' : ['PhysicsTools::Calibration']},
00065 {'RecoCandidate' : ['reco::Candidate']},
00066 {'TrackReco' : ['reco::Track']},
00067 {'VertexReco' : ['reco::Vertex']},
00068 {'TFWLiteSelectorTest' : ['tfwliteselectortest']},
00069 {'PatCandidates' : ['reco::RecoCandidate','pat::[A-Za-z]+Ref(Vector|)']},
00070 {'JetReco' : ['reco::.*Jet','reco::.*Jet(Collection|Ref)']},
00071 ]
00072
00073 ignoreEdmDP = {
00074 'LCGReflex/__gnu_cxx::__normal_iterator<std::basic_string<char>*,std::vector<std::basic_string<char>%>%>' : 1,
00075 '' : 1
00076 }
00077
00078 def getReleaseBaseDir ():
00079 """ return CMSSW_RELEASE_BASE or CMSSW_BASE depending on the
00080 dev area of release area """
00081 baseDir = os.environ.get('CMSSW_RELEASE_BASE')
00082 if not len (baseDir):
00083 baseDir = os.environ.get('CMSSW_BASE')
00084 return baseDir
00085
00086
00087 def searchClassDefXml (srcDir):
00088 """ Searches through the requested directory looking at
00089 'classes_def.xml' files looking for duplicate Reflex definitions."""
00090
00091 classNameRE = re.compile (r'class\s+name\s*=\s*"([^"]*)"')
00092 spacesRE = re.compile (r'\s+')
00093 stdRE = re.compile (r'std::')
00094 srcClassNameRE = re.compile (r'(\w+)/src/classes_def.xml')
00095 ignoreSrcRE = re.compile (r'.*/FWCore/Skeletons/scripts/mkTemplates/.+')
00096 braketRE = re.compile (r'<.+>')
00097
00098 if not len (srcDir):
00099 try:
00100 srcDir = getReleaseBaseDir() + '/src'
00101 except:
00102 raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00103 try:
00104 os.chdir (srcDir)
00105 except:
00106 raise RuntimeError, "'%s' is not a valid directory." % srcDir
00107 print "Searching for 'classes_def.xml' in '%s'." % srcDir
00108 xmlFiles = commands.getoutput ('find . -name "*classes_def.xml" -print').\
00109 split ('\n')
00110
00111 if options.showXMLs:
00112 pprint.pprint (xmlFiles)
00113
00114 xmlPackages = []
00115 packagesREs = {}
00116 equivREs = {}
00117 explicitREs = []
00118 for item in equivDict:
00119 for pack in item:
00120 for equiv in item[pack]:
00121 explicitREs.append( (re.compile(r'\b' + equiv + r'\b'),pack))
00122 if options.lostDefs:
00123 for filename in xmlFiles:
00124 if (not filename) or (ignoreSrcRE.match(filename)): continue
00125 match = srcClassNameRE.search (filename)
00126 if not match: continue
00127 packageName = match.group(1)
00128 xmlPackages.append (packageName)
00129 matchString = r'\b' + packageName + r'\b'
00130 packagesREs[packageName] = re.compile (matchString)
00131 equivList = equivREs.setdefault (packageName, [])
00132 for item in equivDict:
00133 for equiv in item.get (packageName, []):
00134 matchString = re.compile(r'\b' + equiv + r'\b')
00135 equivList.append( (matchString, equiv) )
00136 equivList.append( (packagesREs[packageName], packageName) )
00137
00138 classDict = {}
00139 ncdict = {'class' : 'className'}
00140 for filename in xmlFiles:
00141 if (not filename) or (ignoreSrcRE.match(filename)): continue
00142 dupProblems = ''
00143 exceptName = ''
00144 regexList = []
00145 localObjects = []
00146 simpleObjectREs = []
00147 if options.lostDefs:
00148 lostMatch = srcClassNameRE.search (filename)
00149 if lostMatch:
00150 exceptName = lostMatch.group (1)
00151 regexList = equivREs[exceptName]
00152 xcount = len(regexList)-1
00153 if not regexList[xcount][0].search (exceptName):
00154 print '%s not found in' % exceptName,
00155 print regexList[xcount][0]
00156 sys.exit()
00157 else: continue
00158 if options.verbose:
00159 print "filename", filename
00160 try:
00161 xmlObj = xml2obj (filename = filename,
00162 filtering = True,
00163 nameChangeDict = ncdict)
00164 except Exception as detail:
00165 print "File %s is malformed XML. Please fix." % filename
00166 print " ", detail
00167 continue
00168 try:
00169 classList = xmlObj.selection.className
00170 except:
00171 try:
00172 classList = xmlObj.className
00173 except:
00174
00175 print "**** SKIPPING '%s' - Doesn't seem to have proper information." % filename
00176 continue
00177 for piece in classList:
00178 try:
00179 className = spacesRE.sub ('', piece.name)
00180 except:
00181
00182
00183 continue
00184 className = stdRE.sub ('', className)
00185
00186
00187 for typedef, tdList in typedefsDict.iteritems():
00188 for alias in tdList:
00189 className = re.sub (alias, typedef, className)
00190 classDict.setdefault (className, set()).add (filename)
00191
00192 if not options.lostDefs:
00193 continue
00194 localObjects.append (className)
00195 if options.lazyLostDefs and not braketRE.search (className):
00196
00197 matchString = r'\b' + className + r'\b'
00198 simpleObjectREs.append( (re.compile (matchString), className ) )
00199 for className in localObjects:
00200
00201
00202 foundEquiv = False
00203 for equivRE in regexList:
00204
00205 if equivRE[0].search (className):
00206 foundEquiv = True
00207 break
00208 for simpleRE in simpleObjectREs:
00209 if simpleRE[0].search (className):
00210 foundEquiv = True
00211 if options.verbose and simpleRE[1] != className:
00212 print " Using %s to ignore %s" \
00213 % (simpleRE[1], className)
00214 break
00215 if foundEquiv: continue
00216 for exRes in explicitREs:
00217 if exRes[0].search(className):
00218 dupProblems += " %s : %s\n" % (exRes[1], className)
00219 foundEquiv = True
00220 break
00221 if foundEquiv: continue
00222 for packageName in xmlPackages:
00223
00224
00225 if packagesREs[packageName].search (className):
00226 dupProblems += " %s : %s\n" % (packageName, className)
00227 break
00228
00229 if dupProblems:
00230 print '\n%s\n%s\n' % (filename, dupProblems)
00231
00232 if options.dups:
00233 for name, fileSet in sorted( classDict.iteritems() ):
00234 if len (fileSet) < 2:
00235 continue
00236 print name
00237 fileList = list (fileSet)
00238 fileList.sort()
00239 for filename in fileList:
00240 print " ", filename
00241 print
00242
00243
00244
00245
00246
00247 def searchDuplicatePlugins (edmpluginFile):
00248 """ Searches the edmpluginFile to find any duplicate
00249 plugins."""
00250 cmd = "cat %s | awk '{print $2\" \"$1}' | sort | uniq | awk '{print $1}' | sort | uniq -c | grep '2 ' | awk '{print $2}'" % edmpluginFile
00251 output = commands.getoutput (cmd).split('\n')
00252 for line in output:
00253 if ignoreEdmDP.has_key(line): continue
00254 line = line.replace("*","\*")
00255 cmd = "cat %s | grep ' %s ' | awk '{print $1}' | sort | uniq " % (edmpluginFile,line)
00256 out1 = commands.getoutput (cmd).split('\n')
00257 print line
00258 for plugin in out1:
00259 if plugin:
00260 print " **"+plugin+"**"
00261 print
00262
00263 def searchEdmPluginDump (edmpluginFile, srcDir):
00264 """ Searches the edmpluginFile to find any duplicate Reflex
00265 definitions."""
00266 if not len (edmpluginFile):
00267 try:
00268 edmpluginFile = getReleaseBaseDir() + '/lib/' + \
00269 os.environ.get('SCRAM_ARCH') + '/.edmplugincache'
00270 except:
00271 raise RuntimeError, \
00272 "$CMSSW_RELEASE_BASE or $SCRAM_ARCH not found."
00273 if not len (srcDir):
00274 try:
00275 srcDir = getReleaseBaseDir() + '/src'
00276 except:
00277 raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00278 try:
00279 os.chdir (srcDir)
00280 except:
00281 raise RuntimeError, "'%s' is not a valid directory." % srcDir
00282 searchDuplicatePlugins (edmpluginFile)
00283 packageNames = commands.getoutput ('ls -1').split ('\n')
00284 global packageREs
00285
00286 for package in packageNames:
00287 packageREs.append( re.compile( r'^(' + package + r')(\S+)$') )
00288
00289 prevLine = ''
00290 searchREs = [];
00291 doSearch = False
00292 if options.searchFor:
00293 fixSpacesRE = re.compile (r'\s+');
00294 doSearch = True
00295 words = options.searchFor.split('|')
00296
00297 for word in words:
00298 word = fixSpacesRE.sub (r'.*', word);
00299 searchREs.append( re.compile (word) )
00300 problemSet = set()
00301 cmd = "grep Reflex %s | awk '{print $2}' | sort" % edmpluginFile
00302 for line in commands.getoutput (cmd).split('\n'):
00303 if doSearch:
00304 for regex in searchREs:
00305 if regex.search (line):
00306 problemSet.add (line)
00307 break
00308 else:
00309 if line == prevLine:
00310 if not ignoreEdmDP.has_key(line):
00311 problemSet.add (line)
00312
00313 prevLine = line
00314
00315 pluginCapRE = re.compile (r'plugin(\S+?)Capabilities.so')
00316 fixStarsRE = re.compile (r'\*')
00317 lcgReflexRE = re.compile (r'^LCGReflex/')
00318 percentRE = re.compile (r'%')
00319 problemList = sorted (list (problemSet))
00320 for problem in problemList:
00321
00322
00323 fixedProblem = fixStarsRE.sub (r'\*', problem)
00324 cmd = 'grep "%s" %s | awk \'{print $1}\'' % (fixedProblem,
00325 edmpluginFile)
00326
00327 output = commands.getoutput (cmd).split('\n')
00328 problem = lcgReflexRE.sub (r'', problem)
00329 problem = percentRE.sub (r' ', problem)
00330 print problem
00331
00332 for line in output:
00333 match = pluginCapRE.match (line)
00334 if match:
00335 line = match.group(1)
00336 print " ", getXmlName (line)
00337 print
00338
00339 def getXmlName (line):
00340 """Given a line from EDM plugin dump, try to get XML file name."""
00341 global packageMatchDict
00342 retval = packageMatchDict.get (line)
00343 if retval:
00344 return retval
00345 for regex in packageREs:
00346 match = regex.search (line)
00347 if match:
00348 xmlFile = "./%s/%s/src/classes_def.xml" % \
00349 (match.group(1), match.group(2))
00350 if os.path.exists (xmlFile):
00351 packageMatchDict [line] = xmlFile
00352 return xmlFile
00353
00354
00355
00356
00357 cmd = 'find . -name classes_def.xml -print | grep %s' % line
00358 output = commands.getoutput (cmd).split ('\n')
00359 if output and len (output) == 1:
00360 retval = output[0];
00361 if retval:
00362 packageMatchDict [line] = retval
00363 return retval
00364
00365 cmd = 'find . -name "BuildFile" -exec grep -q %s {} \; -print' % line
00366 output = commands.getoutput (cmd).split ('\n')
00367 if output and len (output) == 1:
00368 retval = output[0];
00369 if retval:
00370 retval = retval + ' (%s)' % line
00371 packageMatchDict [line] = retval
00372 return retval
00373 return "**" + line + "**"
00374
00375
00376
00377 packageREs = [];
00378 packageMatchDict = {}
00379
00380 if __name__ == "__main__":
00381
00382 parser = optparse.OptionParser ("Usage: %prog [options]\n"\
00383 "Searches classes_def.xml for duplicate "\
00384 "definitions")
00385 xmlGroup = optparse.OptionGroup (parser, "ClassDef XML options")
00386 dumpGroup = optparse.OptionGroup (parser, "EdmPluginDump options")
00387 xmlGroup.add_option ('--dups', dest='dups', action='store_true',
00388 default=False,
00389 help="Search for duplicate definitions")
00390 xmlGroup.add_option ('--lostDefs', dest='lostDefs', action='store_true',
00391 default=False,
00392 help="Looks for definitions in the wrong libraries")
00393 xmlGroup.add_option ('--lazyLostDefs', dest='lazyLostDefs',
00394 action='store_true',
00395 default=False,
00396 help="Will try to ignore as many lost defs as reasonable")
00397 xmlGroup.add_option ('--verbose', dest='verbose',
00398 action='store_true',
00399 default=False,
00400 help="Prints out a lot of information")
00401 xmlGroup.add_option ('--showXMLs', dest='showXMLs', action='store_true',
00402 default=False,
00403 help="Shows all 'classes_def.xml' files")
00404 xmlGroup.add_option ('--dir', dest='srcdir', type='string', default='',
00405 help="directory to search for 'classes_def.xml'"\
00406 " files (default: $CMSSW_RELEASE_BASE/src)")
00407 dumpGroup.add_option ('--edmPD', dest='edmPD', action='store_true',
00408 default=False,
00409 help="Searches EDM Plugin Dump for duplicates")
00410 dumpGroup.add_option ('--edmFile', dest='edmFile', type='string',
00411 default='',
00412 help="EDM Plugin Dump cache file'"\
00413 " (default: $CMSSW_RELEASE_BASE/lib/"\
00414 "$SCRAM_ARCH/.edmplugincache)")
00415 dumpGroup.add_option ('--searchFor', dest='searchFor', type='string',
00416 default='',
00417 help="Search EPD for given pipe-separated (|) regexs"
00418 " instead of duplicates")
00419 parser.add_option_group (xmlGroup)
00420 parser.add_option_group (dumpGroup)
00421 (options, args) = parser.parse_args()
00422
00423
00424 if options.lazyLostDefs:
00425 options.lostDefs = True
00426 if options.showXMLs or options.lostDefs or options.dups:
00427 searchClassDefXml (options.srcdir)
00428 if options.edmPD:
00429 searchEdmPluginDump (options.edmFile, options.srcdir)