00001
00002
00003 import optparse
00004 import os
00005 import commands
00006 import re
00007 import sys
00008 import pprint
00009 import commands
00010 import subprocess
00011 from XML2Python import xml2obj
00012
00013
00014
00015 typedefsDict = \
00016 {
00017
00018 'unsigned int' : ['unsignedint', 'UInt32_t', 'uint32_t'],
00019 'unsigned long': ['unsignedlong'],
00020 'int' : ['Int32_t'],
00021 'float' : ['Float_t'],
00022 'double' : ['Double_t'],
00023 'char' : ['Char_t'],
00024 '< ' : ['<', '<'],
00025 ' >' : ['>', '>'],
00026 ', ' : [','],
00027 }
00028
00029
00030
00031
00032
00033 equivDict = \
00034 [
00035 {'GsfTracking' : ['reco::GsfTrack(Collection|).*(MomentumConstraint|VertexConstraint)', 'Trajectory.*reco::GsfTrack']},
00036 {'ParallelAnalysis' : ['examples::TrackAnalysisAlgorithm']},
00037 {'PatCandidates' : ['pat::PATObject','pat::Lepton']},
00038 {'BTauReco' : ['reco::SoftLeptonProperties','reco::SecondaryVertexTagInfo']},
00039 {'CastorReco' : ['reco::CastorJet']},
00040 {'JetMatching' : ['reco::JetFlavour','reco::MatchedPartons']},
00041 {'TrackingAnalysis' : ['TrackingParticle']},
00042 {'Egamma' : ['reco::ElectronID']},
00043 {'TopObjects' : ['reco::CATopJetProperties']},
00044 {'TauReco' : ['reco::L2TauIsolationInfo','reco::RecoTauPiZero','reco::BaseTau']},
00045 {'ValidationFormats' : ['PGlobalDigi::.+','PGlobalRecHit::.+']},
00046 {'TrajectorySeed' : ['TrajectorySeed']},
00047 {'TrackCandidate' : ['TrackCandidate']},
00048 {'PatternTools' : ['MomentumConstraint','VertexConstraint','Trajectory']},
00049 {'TrackerRecHit2D' : ['SiStrip(Matched|)RecHit[12]D','SiTrackerGSRecHit[12]D','SiPixelRecHit']},
00050 {'MuonReco' : ['reco::Muon(Ref|)(Vector|)']},
00051 {'MuonSeed' : ['L3MuonTrajectorySeed']},
00052 {'HepMCCandidate' : ['reco::GenParticle.*']},
00053 {'L1Trigger' : ['l1extra::L1.+Particle']},
00054 {'TrackInfo' : ['reco::TrackingRecHitInfo']},
00055 {'EgammaCandidates' : ['reco::GsfElectron.*','reco::Photon.*']},
00056 {'HcalIsolatedTrack' : ['reco::IsolatedPixelTrackCandidate', 'reco::EcalIsolatedParticleCandidate']},
00057 {'HcalRecHit' : ['HFRecHit','HORecHit','ZDCRecHit','HBHERecHit']},
00058 {'PFRootEvent' : ['EventColin::']},
00059 {'CaloTowers' : ['CaloTower.*']},
00060 {'GsfTrackReco' : ['GsfTrack.*']},
00061 {'METReco' : ['reco::(Calo|PF|Gen|)MET','reco::PFClusterMET']},
00062 {'ParticleFlowCandidate' : ['reco::PFCandidateRef','reco::PFCandidateFwdRef']},
00063 {'PhysicsToolsObjects' : ['PhysicsTools::Calibration']},
00064 {'RecoCandidate' : ['reco::Candidate']},
00065 {'TrackReco' : ['reco::Track']},
00066 {'VertexReco' : ['reco::Vertex']},
00067 {'TFWLiteSelectorTest' : ['tfwliteselectortest']},
00068 {'PatCandidates' : ['reco::RecoCandidate','pat::[A-Za-z]+Ref(Vector|)']},
00069 {'JetReco' : ['reco::.*Jet','reco::.*Jet(Collection|Ref)']},
00070 ]
00071
00072 ignoreEdmDP = {
00073 'LCGReflex/__gnu_cxx::__normal_iterator<std::basic_string<char>*,std::vector<std::basic_string<char>%>%>' : 1,
00074 '' : 1
00075 }
00076
00077 def getReleaseBaseDir ():
00078 """ return CMSSW_RELEASE_BASE or CMSSW_BASE depending on the
00079 dev area of release area """
00080 baseDir = os.environ.get('CMSSW_RELEASE_BASE')
00081 if not len (baseDir):
00082 baseDir = os.environ.get('CMSSW_BASE')
00083 return baseDir
00084
00085
00086 def searchClassDefXml (srcDir):
00087 """ Searches through the requested directory looking at
00088 'classes_def.xml' files looking for duplicate Reflex definitions."""
00089
00090 classNameRE = re.compile (r'class\s+name\s*=\s*"([^"]*)"')
00091 spacesRE = re.compile (r'\s+')
00092 stdRE = re.compile (r'std::')
00093 srcClassNameRE = re.compile (r'(\w+)/src/classes_def.xml')
00094 ignoreSrcRE = re.compile (r'.*/FWCore/Skeletons/scripts/mkTemplates/.+')
00095 braketRE = re.compile (r'<.+>')
00096
00097 if not len (srcDir):
00098 try:
00099 srcDir = getReleaseBaseDir() + '/src'
00100 except:
00101 raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00102 try:
00103 os.chdir (srcDir)
00104 except:
00105 raise RuntimeError, "'%s' is not a valid directory." % srcDir
00106 print "Searching for 'classes_def.xml' in '%s'." % srcDir
00107 xmlFiles = commands.getoutput ('find . -name "*classes_def.xml" -print').\
00108 split ('\n')
00109
00110 if options.showXMLs:
00111 pprint.pprint (xmlFiles)
00112
00113 xmlPackages = []
00114 packagesREs = {}
00115 equivREs = {}
00116 explicitREs = []
00117 for item in equivDict:
00118 for pack in item:
00119 for equiv in item[pack]:
00120 explicitREs.append( (re.compile(r'\b' + equiv + r'\b'),pack))
00121 if options.lostDefs:
00122 for filename in xmlFiles:
00123 if (not filename) or (ignoreSrcRE.match(filename)): continue
00124 match = srcClassNameRE.search (filename)
00125 if not match: continue
00126 packageName = match.group(1)
00127 xmlPackages.append (packageName)
00128 matchString = r'\b' + packageName + r'\b'
00129 packagesREs[packageName] = re.compile (matchString)
00130 equivList = equivREs.setdefault (packageName, [])
00131 for item in equivDict:
00132 for equiv in item.get (packageName, []):
00133 matchString = re.compile(r'\b' + equiv + r'\b')
00134 equivList.append( (matchString, equiv) )
00135 equivList.append( (packagesREs[packageName], packageName) )
00136
00137 classDict = {}
00138 ncdict = {'class' : 'className'}
00139 for filename in xmlFiles:
00140 if (not filename) or (ignoreSrcRE.match(filename)): continue
00141 dupProblems = ''
00142 exceptName = ''
00143 regexList = []
00144 localObjects = []
00145 simpleObjectREs = []
00146 if options.lostDefs:
00147 lostMatch = srcClassNameRE.search (filename)
00148 if lostMatch:
00149 exceptName = lostMatch.group (1)
00150 regexList = equivREs[exceptName]
00151 xcount = len(regexList)-1
00152 if not regexList[xcount][0].search (exceptName):
00153 print '%s not found in' % exceptName,
00154 print regexList[xcount][0]
00155 sys.exit()
00156 else: continue
00157 if options.verbose:
00158 print "filename", filename
00159 try:
00160 xmlObj = xml2obj (filename = filename,
00161 filtering = True,
00162 nameChangeDict = ncdict)
00163 except Exception as detail:
00164 print "File %s is malformed XML. Please fix." % filename
00165 print " ", detail
00166 continue
00167 try:
00168 classList = xmlObj.selection.className
00169 except:
00170 try:
00171 classList = xmlObj.className
00172 except:
00173
00174 print "**** SKIPPING '%s' - Doesn't seem to have proper information." % filename
00175 continue
00176 for piece in classList:
00177 try:
00178 className = spacesRE.sub ('', piece.name)
00179 except:
00180
00181
00182 continue
00183 className = stdRE.sub ('', className)
00184
00185
00186 for typedef, tdList in typedefsDict.iteritems():
00187 for alias in tdList:
00188 className = re.sub (alias, typedef, className)
00189 classDict.setdefault (className, set()).add (filename)
00190
00191 if not options.lostDefs:
00192 continue
00193 localObjects.append (className)
00194 if options.lazyLostDefs and not braketRE.search (className):
00195
00196 matchString = r'\b' + className + r'\b'
00197 simpleObjectREs.append( (re.compile (matchString), className ) )
00198 for className in localObjects:
00199
00200
00201 foundEquiv = False
00202 for equivRE in regexList:
00203
00204 if equivRE[0].search (className):
00205 foundEquiv = True
00206 break
00207 for simpleRE in simpleObjectREs:
00208 if simpleRE[0].search (className):
00209 foundEquiv = True
00210 if options.verbose and simpleRE[1] != className:
00211 print " Using %s to ignore %s" \
00212 % (simpleRE[1], className)
00213 break
00214 if foundEquiv: continue
00215 for exRes in explicitREs:
00216 if exRes[0].search(className):
00217 dupProblems += " %s : %s\n" % (exRes[1], className)
00218 foundEquiv = True
00219 break
00220 if foundEquiv: continue
00221 for packageName in xmlPackages:
00222
00223
00224 if packagesREs[packageName].search (className):
00225 dupProblems += " %s : %s\n" % (packageName, className)
00226 break
00227
00228 if dupProblems:
00229 print '\n%s\n%s\n' % (filename, dupProblems)
00230
00231 if options.dups:
00232 for name, fileSet in sorted( classDict.iteritems() ):
00233 if len (fileSet) < 2:
00234 continue
00235 print name
00236 fileList = list (fileSet)
00237 fileList.sort()
00238 for filename in fileList:
00239 print " ", filename
00240 print
00241
00242
00243
00244
00245
00246 def searchDuplicatePlugins (edmpluginFile):
00247 """ Searches the edmpluginFile to find any duplicate
00248 plugins."""
00249 cmd = "cat %s | awk '{print $2\" \"$1}' | sort | uniq | awk '{print $1}' | sort | uniq -c | grep '2 ' | awk '{print $2}'" % edmpluginFile
00250 output = commands.getoutput (cmd).split('\n')
00251 for line in output:
00252 if ignoreEdmDP.has_key(line): continue
00253 line = line.replace("*","\*")
00254 cmd = "cat %s | grep ' %s ' | awk '{print $1}' | sort | uniq " % (edmpluginFile,line)
00255 out1 = commands.getoutput (cmd).split('\n')
00256 print line
00257 for plugin in out1:
00258 if plugin:
00259 print " **"+plugin+"**"
00260 print
00261
00262 def searchEdmPluginDump (edmpluginFile, srcDir):
00263 """ Searches the edmpluginFile to find any duplicate Reflex
00264 definitions."""
00265 if not len (edmpluginFile):
00266 try:
00267 edmpluginFile = getReleaseBaseDir() + '/lib/' + \
00268 os.environ.get('SCRAM_ARCH') + '/.edmplugincache'
00269 except:
00270 raise RuntimeError, \
00271 "$CMSSW_RELEASE_BASE or $SCRAM_ARCH not found."
00272 if not len (srcDir):
00273 try:
00274 srcDir = getReleaseBaseDir() + '/src'
00275 except:
00276 raise RuntimeError, "$CMSSW_RELEASE_BASE not found."
00277 try:
00278 os.chdir (srcDir)
00279 except:
00280 raise RuntimeError, "'%s' is not a valid directory." % srcDir
00281 searchDuplicatePlugins (edmpluginFile)
00282 packageNames = commands.getoutput ('ls -1').split ('\n')
00283 global packageREs
00284
00285 for package in packageNames:
00286 packageREs.append( re.compile( r'^(' + package + r')(\S+)$') )
00287
00288 prevLine = ''
00289 searchREs = [];
00290 doSearch = False
00291 if options.searchFor:
00292 fixSpacesRE = re.compile (r'\s+');
00293 doSearch = True
00294 words = options.searchFor.split('|')
00295
00296 for word in words:
00297 word = fixSpacesRE.sub (r'.*', word);
00298 searchREs.append( re.compile (word) )
00299 problemSet = set()
00300 cmd = "grep Reflex %s | awk '{print $2}' | sort" % edmpluginFile
00301 for line in commands.getoutput (cmd).split('\n'):
00302 if doSearch:
00303 for regex in searchREs:
00304 if regex.search (line):
00305 problemSet.add (line)
00306 break
00307 else:
00308 if line == prevLine:
00309 if not ignoreEdmDP.has_key(line):
00310 problemSet.add (line)
00311
00312 prevLine = line
00313
00314 pluginCapRE = re.compile (r'plugin(\S+?)Capabilities.so')
00315 fixStarsRE = re.compile (r'\*')
00316 lcgReflexRE = re.compile (r'^LCGReflex/')
00317 percentRE = re.compile (r'%')
00318 problemList = sorted (list (problemSet))
00319 for problem in problemList:
00320
00321
00322 fixedProblem = fixStarsRE.sub (r'\*', problem)
00323 cmd = 'grep "%s" %s | awk \'{print $1}\'' % (fixedProblem,
00324 edmpluginFile)
00325
00326 output = commands.getoutput (cmd).split('\n')
00327 problem = lcgReflexRE.sub (r'', problem)
00328 problem = percentRE.sub (r' ', problem)
00329 print problem
00330
00331 for line in output:
00332 match = pluginCapRE.match (line)
00333 if match:
00334 line = match.group(1)
00335 print " ", getXmlName (line)
00336 print
00337
00338 def getXmlName (line):
00339 """Given a line from EDM plugin dump, try to get XML file name."""
00340 global packageMatchDict
00341 retval = packageMatchDict.get (line)
00342 if retval:
00343 return retval
00344 for regex in packageREs:
00345 match = regex.search (line)
00346 if match:
00347 xmlFile = "./%s/%s/src/classes_def.xml" % \
00348 (match.group(1), match.group(2))
00349 if os.path.exists (xmlFile):
00350 packageMatchDict [line] = xmlFile
00351 return xmlFile
00352
00353
00354
00355
00356 cmd = 'find . -name classes_def.xml -print | grep %s' % line
00357 output = commands.getoutput (cmd).split ('\n')
00358 if output and len (output) == 1:
00359 retval = output[0];
00360 if retval:
00361 packageMatchDict [line] = retval
00362 return retval
00363
00364 cmd = 'find . -name "BuildFile" -exec grep -q %s {} \; -print' % line
00365 output = commands.getoutput (cmd).split ('\n')
00366 if output and len (output) == 1:
00367 retval = output[0];
00368 if retval:
00369 retval = retval + ' (%s)' % line
00370 packageMatchDict [line] = retval
00371 return retval
00372 return "**" + line + "**"
00373
00374
00375
00376 packageREs = [];
00377 packageMatchDict = {}
00378
00379 if __name__ == "__main__":
00380
00381 parser = optparse.OptionParser ("Usage: %prog [options]\n"\
00382 "Searches classes_def.xml for duplicate "\
00383 "definitions")
00384 xmlGroup = optparse.OptionGroup (parser, "ClassDef XML options")
00385 dumpGroup = optparse.OptionGroup (parser, "EdmPluginDump options")
00386 xmlGroup.add_option ('--dups', dest='dups', action='store_true',
00387 default=False,
00388 help="Search for duplicate definitions")
00389 xmlGroup.add_option ('--lostDefs', dest='lostDefs', action='store_true',
00390 default=False,
00391 help="Looks for definitions in the wrong libraries")
00392 xmlGroup.add_option ('--lazyLostDefs', dest='lazyLostDefs',
00393 action='store_true',
00394 default=False,
00395 help="Will try to ignore as many lost defs as reasonable")
00396 xmlGroup.add_option ('--verbose', dest='verbose',
00397 action='store_true',
00398 default=False,
00399 help="Prints out a lot of information")
00400 xmlGroup.add_option ('--showXMLs', dest='showXMLs', action='store_true',
00401 default=False,
00402 help="Shows all 'classes_def.xml' files")
00403 xmlGroup.add_option ('--dir', dest='srcdir', type='string', default='',
00404 help="directory to search for 'classes_def.xml'"\
00405 " files (default: $CMSSW_RELEASE_BASE/src)")
00406 dumpGroup.add_option ('--edmPD', dest='edmPD', action='store_true',
00407 default=False,
00408 help="Searches EDM Plugin Dump for duplicates")
00409 dumpGroup.add_option ('--edmFile', dest='edmFile', type='string',
00410 default='',
00411 help="EDM Plugin Dump cache file'"\
00412 " (default: $CMSSW_RELEASE_BASE/lib/"\
00413 "$SCRAM_ARCH/.edmplugincache)")
00414 dumpGroup.add_option ('--searchFor', dest='searchFor', type='string',
00415 default='',
00416 help="Search EPD for given pipe-separated (|) regexs"
00417 " instead of duplicates")
00418 parser.add_option_group (xmlGroup)
00419 parser.add_option_group (dumpGroup)
00420 (options, args) = parser.parse_args()
00421
00422
00423 if options.lazyLostDefs:
00424 options.lostDefs = True
00425 if options.showXMLs or options.lostDefs or options.dups:
00426 searchClassDefXml (options.srcdir)
00427 if options.edmPD:
00428 searchEdmPluginDump (options.edmFile, options.srcdir)