CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/Validation/RecoTau/Tools/MultipleCompare.py

Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 
00003 import FWCore.ParameterSet.Config as cms
00004 import sys
00005 import os
00006 import math
00007 import re
00008 import Validation.RecoTau.RecoTauValidation_cfi as validation
00009 from optparse import OptionParser
00010 from ROOT import *
00011 
00012 __author__  = "Mauro Verzetti (mauro.verzetti@cern.ch)"
00013 __doc__ = """Script to plot the content of a Validation .root file and compare it to a different file:\n\n
00014 Usage: MultipleCompare.py -T testFile -R refFile [options] [search strings that you want to apply '*' is supported as special character]"""
00015 
00016 def LoadCommandlineOptions(argv):
00017   sys.argv = argv
00018   parser = OptionParser(description=__doc__)
00019   parser.add_option('--myhelp',metavar='', action="store_true",help='prints this output message',dest='help',default = False)
00020   parser.add_option('--TestFile','-T',metavar='testFile', type=str,help='Sets the test file',dest='test',default = '')
00021   parser.add_option('--RefFile','-R',metavar='refFile', type=str,help='Sets the reference file',dest='ref',default = None)
00022   parser.add_option('--output','-o',metavar='outputFile', type=str,help='Sets the output file',dest='out',default = 'MultipleCompare.png')
00023   parser.add_option('--logScale',action="store_true", dest="logScale", default=False, help="Sets the log scale in the plot")
00024   parser.add_option('--fakeRate','-f',action="store_true", dest="fakeRate", default=False, help="Sets the fake rate options and put the correct label (implies --logScale)")
00025   parser.add_option('--testLabel','-t',metavar='testLabel', type=str,help='Sets the label to put in the plots for test file',dest='testLabel',default = None)
00026   parser.add_option('--refLabel','-r',metavar='refLabel', type=str,help='Sets the label to put in the plots for ref file',dest='refLabel',default = None)
00027   parser.add_option('--maxLog',metavar='number', type=float,help='Sets the maximum of the scale in log scale (requires --logScale or -f to work)',dest='maxLog',default = 3)
00028   parser.add_option('--minDiv',metavar='number', type=float,help='Sets the minimum of the scale in the ratio pad',dest='minDiv',default = 0.001)
00029   parser.add_option('--maxDiv',metavar='number', type=float,help='Sets the maximum of the scale in the ratio pad',dest='maxDiv',default = 2)
00030   parser.add_option('--logDiv',action="store_true", dest="logDiv", default=False, help="Sets the log scale in the plot")
00031   parser.add_option('--normalize',action="store_true", dest="normalize", default=False, help="plot normalized")
00032   parser.add_option('--maxRange',metavar='number',type=float, dest="maxRange", default=1.6, help="Sets the maximum range in linear plots")
00033   parser.add_option('--rebin', dest="rebin", type=int, default=-1, help="Sets the rebinning scale")
00034   parser.add_option('--branding','-b',metavar='branding', type=str,help='Define a branding to label the plots (in the top right corner)',dest='branding',default = None)
00035   #parser.add_option('--search,-s',metavar='searchStrings', type=str,help='Sets the label to put in the plots for ref file',dest='testLabel',default = None) No idea on how to tell python to use all the strings before a new option, thus moving this from option to argument (but may be empty)  
00036   
00037   (options,toPlot) = parser.parse_args()
00038   if options.help:
00039     parser.print_help()
00040     sys.exit(0)
00041   return [options, toPlot]
00042 
00043 def GetContent(dir):
00044     tempList = dir.GetListOfKeys()
00045     retList = []
00046     for it in range(0,tempList.GetSize()):
00047        retList.append(tempList.At(it).ReadObj())
00048     return retList
00049 
00050 def MapDirStructure( directory, dirName, objectList ):
00051     dirContent = GetContent(directory)
00052     for entry in dirContent:
00053         if type(entry) is TDirectory or type(entry) is TDirectoryFile:
00054             subdirName = os.path.join(dirName,entry.GetName())
00055             MapDirStructure(entry, subdirName,objectList)
00056         else:
00057             pathname = os.path.join(dirName,entry.GetName())
00058             objectList.append(pathname)
00059 
00060 def Match(required, got):
00061     for part in required.split('*'):
00062         if got.find(part) == -1:
00063             return False
00064     return True
00065 
00066 def Divide(hNum,hDen):
00067     ret = hNum.Clone('Division')
00068     ret.GetYaxis().SetTitle('Ratio')
00069     for binI in range(hNum.GetNbinsX()):
00070         denVal = hDen.GetBinContent(binI)
00071         denErr = hDen.GetBinError(binI)
00072         numErr = hNum.GetBinError(binI)
00073         numVal = hNum.GetBinContent(binI)
00074         if denVal == 0:
00075             ret.SetBinContent(binI,0)
00076             ret.SetBinError(binI,0)
00077         else:
00078             ret.SetBinContent(binI,numVal/denVal)
00079             if numVal==0:
00080                 ret.SetBinError(binI,1)
00081             else:
00082                 ret.SetBinError(binI,(numVal/denVal)*math.sqrt(math.pow(numErr/numVal,2) + math.pow(denErr/denVal,2) ) )
00083     return ret
00084 
00085 def DetermineHistType(name):
00086   #automatically derive all plot types in the future?
00087   type = ''
00088   label = ''
00089   prefix = ''
00090   #assuming plots name like: tauType_plotType_xAxis or tauType_plotType_selection
00091   matches = re.match(r'.*/(.*)_(.*)_(.*)', name)
00092   if matches:
00093     prefix = matches.group(1) 
00094     label = matches.group(3)
00095     knowntypes = (['pTRatio','SumPt','Size'])
00096     for knowntype in knowntypes:
00097       if matches.group(2) == knowntype:
00098         type = knowntype
00099     if not type:  #there are plots labelled ..._vs_...
00100       type = 'Eff'
00101   else:
00102     type = 'Eff'
00103 
00104   prefixParts = prefix.partition('Discrimination')
00105   if prefixParts[2] != '':
00106     prefix = prefixParts[2]
00107     prefixParts = prefix.partition('By')
00108     if prefixParts[2] != '':
00109       prefix = prefixParts[2]
00110 
00111   #print 'type is ' + type
00112   return [type, label, prefix]
00113 
00114 def DrawTitle(text):
00115         title = TLatex()
00116         title.SetNDC()
00117         title.SetTextAlign(12)#3*10=right,3*1=top
00118         title.SetTextSize(.035) 
00119         leftMargin = gStyle.GetPadLeftMargin()
00120         topMargin = 1 - 0.5*gStyle.GetPadTopMargin()
00121         title.DrawLatex(leftMargin, topMargin, text)
00122 
00123 def DrawBranding(options, label=''):
00124   if options.branding != None or label != '':
00125     text = TLatex()
00126     text.SetNDC();
00127     text.SetTextAlign(11)#3*10=right,3*1=top
00128     text.SetTextSize(.025)
00129     text.SetTextColor(13)
00130     if options.out.find(".eps")!=-1:
00131       text.SetTextAngle(-91.0)#eps BUG
00132     else:
00133       text.SetTextAngle(-90.0)
00134     rightMargin = 1 - gStyle.GetPadRightMargin()
00135     topMargin = 1 - gStyle.GetPadTopMargin()
00136     if label!='':
00137       label += ': '
00138     text.DrawLatex(rightMargin+.01, topMargin+0.025, label+options.branding);
00139 
00140 
00141 def FindParents(histoPath):
00142     root = histoPath[:histoPath.find('_')]
00143     par = histoPath[histoPath.find('Eff')+3:]
00144     validationPlots = validation.proc.efficiencies.plots._Parameterizable__parameterNames
00145     found =0
00146     num = ''
00147     den = ''
00148     for efficiency in validationPlots:
00149         effpset = getattr(validation.proc.efficiencies.plots,efficiency)
00150         effName = effpset.efficiency.value()
00151         effNameCut = effName[effName.find('_'):effName.find('#')]
00152         if effNameCut in histoPath:
00153             if found == 1:
00154                 print 'More than one pair of parents found for ' + histopath + ':'
00155                 assert(False)
00156             num = root + effpset.numerator.value()[effName.find('_'):].replace('#PAR#',par)
00157             den = root + effpset.denominator.value()[effName.find('_'):].replace('#PAR#',par)
00158             found += 1
00159     return [num,den]
00160 
00161 def Rebin(tfile, histoPath, rebinVal):
00162     parents = FindParents(histoPath)
00163     num = tfile.Get(parents[0])
00164     if type(num) != TH1F:
00165         print 'Looking for '+num
00166         print 'Plot now found! What the hell are you doing? Exiting...'
00167         sys.exit()
00168     denSingle = tfile.Get(parents[1])
00169     if type(denSingle) != TH1F:
00170         print 'Looking for '+denSingle
00171         print 'Plot now found! What the hell are you doing? Exiting...'
00172         sys.exit()
00173     num.Rebin(rebinVal)
00174     den = denSingle.Rebin(rebinVal,'denClone')
00175     retVal = num.Clone(histoPath+'Rebin%s'%rebinVal)
00176     #print 'Num : ' + parents[0]
00177     #print 'Den : ' +parents[1]
00178     #print "NumBins: %s DenBins: %s" % (num.GetNbinsX(), den.GetNbinsX() )
00179     retVal.Divide(num,den,1,1,'B')
00180     return retVal
00181 
00182 def findRange(hists, min0=-1, max0=-1):
00183   if len(hists) < 1:
00184     return
00185   #auto ranges if no user value provided
00186   min = min0
00187   max = max0
00188   if min0 == -1 or max0 == -1:
00189     for hist in hists:
00190       if min0 == -1:
00191         #Divide() sets bin to zero if division not possible. Ignore these bins.
00192         minTmp = getMinimumIncludingErrors(hist)
00193         if minTmp < min or min == -1:
00194           min = minTmp
00195       if max0 == -1:
00196         maxTmp = getMaximumIncludingErrors(hist)
00197         if maxTmp > max or max == -1:
00198           max = maxTmp
00199   return [min, max]
00200 
00201 def optimizeRangeMainPad(argv, pad, hists):
00202   pad.Update()
00203   if pad.GetLogy() and argv.count('maxLog') > 0:
00204     maxLog = options.maxLog
00205   else:
00206     maxLog = -1
00207   min, max = findRange(hists, -1, maxLog)
00208   if pad.GetLogy():
00209     if min == 0:
00210       min = 0.001
00211     if max < 2:
00212       max = 2. #prefere fixed range for easy comparison
00213   else:
00214     if min < 0.7:
00215       min = 0. #start from zero if possible
00216     if max <= 1.1 and max > 0.7:
00217       max = 1.2 #prefere fixed range for easy comparison
00218   hists[0].SetAxisRange(min, max, "Y")
00219 
00220 def optimizeRangeSubPad(argv, hists):
00221   min = -1
00222   max = -1
00223   if argv.count('minDiv') > 0:
00224     min = options.minDiv
00225   if argv.count('maxDiv') > 0:
00226     max = options.maxDiv
00227   min, max = findRange(hists, min, max)
00228   if max > 2:
00229     max = 2 #maximal bound
00230   hists[0].SetAxisRange(min, max, "Y")
00231 
00232 
00233 def getMaximumIncludingErrors(hist):
00234 #find maximum considering also the errors
00235   distance = 1.
00236   max = -1
00237   pos = 0
00238   for i in range(1, hist.GetNbinsX()):
00239     if hist.GetBinContent(i) > max:#ignore errors here
00240       max = hist.GetBinContent(i)
00241       pos = i
00242   return max + distance*hist.GetBinError(pos)
00243 
00244 def getMinimumIncludingErrors(hist):
00245   #find minimum considering also the errors
00246   #ignoring zero bins
00247   distance = 1.
00248   min = -1
00249   pos = 0
00250   for i in range(1, hist.GetNbinsX()):
00251     if hist.GetBinContent(i)<=0.:#ignore errors here
00252       continue
00253     if hist.GetBinContent(i) < min or min==-1:
00254       min = hist.GetBinContent(i)
00255       pos = i
00256       if min < 0:
00257         min = 0  
00258   return min - distance*hist.GetBinError(pos)
00259 
00260 
00261 def main(argv=None):
00262   if argv is None:
00263     argv = sys.argv
00264 
00265   options, toPlot = LoadCommandlineOptions(argv)
00266 
00267   gROOT.SetStyle('Plain')
00268   gROOT.SetBatch()
00269   gStyle.SetPalette(1)
00270   gStyle.SetOptStat(0)
00271   gStyle.SetPadGridX(True)
00272   gStyle.SetPadGridY(True)
00273   gStyle.SetOptTitle(0)
00274   gStyle.SetPadTopMargin(0.1)
00275   gStyle.SetPadBottomMargin(0.1)
00276   gStyle.SetPadLeftMargin(0.13)
00277   gStyle.SetPadRightMargin(0.07)
00278 
00279 
00280   testFile = TFile(options.test)
00281   refFile = None
00282   if options.ref != None:
00283     refFile = TFile(options.ref)
00284 
00285   #Takes the position of all plots that were produced
00286   plotList = []
00287   MapDirStructure( testFile,'',plotList)
00288 
00289   histoList = []
00290   for plot in toPlot:
00291     for path in plotList:
00292         if Match(plot.lower(),path.lower()):
00293             histoList.append(path)
00294 
00295 #  print "options: ",options
00296 #  print "toPlot: ",toPlot
00297   print histoList
00298 
00299   if len(histoList)<1:
00300     print '\tError: Please specify at least one histogram.'
00301     if len(toPlot)>0:
00302       print 'Check your plot list:', toPlot
00303     sys.exit()
00304 
00305 
00306   #WARNING: For now the hist type is assumed to be constant over all histos.
00307   histType, label, prefix = DetermineHistType(histoList[0])
00308   #decide whether or not to scale to an integral of 1
00309   #should usually not be used most plot types. they are already normalized.
00310   scaleToIntegral = False
00311   if options.normalize:
00312     scaleToIntegral = True
00313 
00314   ylabel = 'Efficiency'
00315 
00316   if options.fakeRate:
00317     ylabel = 'Fake rate'
00318 
00319   drawStats = False
00320   if histType=='pTRatio' and len(histoList)<3:
00321     drawStats = True
00322 
00323   #legend = TLegend(0.6,0.83,0.6+0.39,0.83+0.17)
00324   x1 = 0.55
00325   x2 = 1-gStyle.GetPadRightMargin()
00326   y2 = 1-gStyle.GetPadTopMargin()
00327   lineHeight = .025
00328   if len(histoList) == 1:
00329     lineHeight = .05
00330   y1 = y2 - lineHeight*len(histoList)
00331   legend = TLegend(x1,y1,x2,y2)
00332   #legend.SetHeader(label)
00333   legend.SetFillColor(0)
00334   if drawStats:
00335     y2 = y1
00336     y1 = y2 - .07*len(histoList)
00337     statsBox = TPaveText(x1,y1,x2,y2,"NDC")
00338     statsBox.SetFillColor(0)
00339     statsBox.SetTextAlign(12)#3*10=right,3*1=top
00340     statsBox.SetMargin(0.05)
00341     statsBox.SetBorderSize(1)
00342 
00343     
00344   canvas = TCanvas('MultiPlot','MultiPlot',validation.standardDrawingStuff.canvasSizeX.value(),832)
00345   effPad = TPad('effPad','effPad',0,0.25,1.,1.,0,0)
00346   effPad.SetBottomMargin(0.1);
00347   effPad.SetTopMargin(0.1);
00348   effPad.SetLeftMargin(0.13);
00349   effPad.SetRightMargin(0.07);
00350   effPad.Draw()
00351   header = ''
00352   if options.testLabel != None:
00353     header += 'Dots: '+options.testLabel
00354   if options.refLabel != None:
00355     header += ' Line: '+options.refLabel
00356   DrawTitle(header)
00357   DrawBranding(options)
00358   diffPad = TPad('diffPad','diffPad',0.,0.,1,.25,0,0)
00359   diffPad.Draw()
00360   colors = [2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1]
00361   first = True
00362   divHistos = []
00363   statTemplate = '%s Mean: %.3f RMS: %.3f'
00364   testHs = []
00365   refHs = []
00366   for histoPath,color in zip(histoList,colors):
00367     if(options.rebin == -1):
00368       testH = testFile.Get(histoPath)
00369     else:
00370         testH = Rebin(testFile,histoPath,options.rebin)
00371     if type(testH) != TH1F:
00372         print 'Looking for '+histoPath
00373         print 'Test plot now found! What the hell are you doing? Exiting...'
00374         sys.exit()
00375     testHs.append(testH)
00376     xAx = histoPath[histoPath.find('Eff')+len('Eff'):]
00377     effPad.cd()
00378     if not testH.GetXaxis().GetTitle():  #only overwrite label if none already existing
00379       if hasattr(validation.standardDrawingStuff.xAxes,xAx):
00380         testH.GetXaxis().SetTitle( getattr(validation.standardDrawingStuff.xAxes,xAx).xAxisTitle.value())
00381     if not testH.GetYaxis().GetTitle():  #only overwrite label if none already existing
00382       testH.GetYaxis().SetTitle(ylabel)
00383     if label!='':
00384       testH.GetXaxis().SetTitle(label+': '+testH.GetXaxis().GetTitle())
00385     testH.GetXaxis().SetTitleOffset(1.1)
00386     testH.GetYaxis().SetTitleOffset(1.5)
00387     testH.SetMarkerSize(1)
00388     testH.SetMarkerStyle(20)
00389     testH.SetMarkerColor(color)
00390     if histType == 'Eff':
00391       legend.AddEntry(testH,histoPath[histoPath.rfind('/')+1:histoPath.find(histType)],'p')
00392     else:
00393       legend.AddEntry(testH,DetermineHistType(histoPath)[2],'p')
00394     if drawStats:
00395         text = statsBox.AddText(statTemplate % ('Dots',testH.GetMean(), testH.GetRMS()) )
00396         text.SetTextColor(color)
00397     if first:
00398         first = False
00399         if options.logScale:
00400             effPad.SetLogy()
00401         if scaleToIntegral:
00402           if testH.GetEntries() > 0:
00403             if not testH.GetSumw2N():
00404               testH.Sumw2()
00405               testH.DrawNormalized('ex0 P')
00406             else:
00407               print "--> Warning! You tried to normalize a histogram which seems to be already scaled properly. Draw it unscaled."
00408               scaleToIntegral = False
00409               testH.Draw('ex0')
00410         else:
00411           testH.Draw('ex0')
00412     else:
00413         if scaleToIntegral:
00414           if testH.GetEntries() > 0:
00415             testH.DrawNormalized('same p')
00416         else:
00417             testH.Draw('same ex0 l')
00418     if refFile == None:
00419         continue
00420     if(options.rebin == -1):
00421         refH = refFile.Get(histoPath)
00422     else:
00423         refH = Rebin(refFile,histoPath,options.rebin)
00424     if type(refH) != TH1F:
00425         continue
00426     refHs.append(refH)
00427     refH.SetLineColor(color)
00428     refH.SetLineWidth(1)
00429     if scaleToIntegral:
00430       if testH.GetEntries() > 0:
00431         refH.DrawNormalized('same hist')
00432     else:
00433         refH.DrawCopy('same hist')
00434     if drawStats:
00435       text = statsBox.AddText(statTemplate % ('Line',refH.GetMean(), refH.GetRMS()) )
00436       text.SetTextColor(color)
00437     refH.SetFillColor(color)
00438     refH.SetFillStyle(3001)
00439     if scaleToIntegral:
00440         entries = testH.GetEntries()
00441         if entries > 0:
00442           testH.Scale(1./entries)
00443         entries = refH.GetEntries()
00444         refH.Sumw2()
00445         if entries > 0:
00446           refH.Scale(1./entries)
00447     refH.Draw('same e3')
00448     divHistos.append(Divide(testH,refH))
00449 
00450   tmpHists = []
00451   tmpHists.extend(testHs)
00452   tmpHists.extend(refHs)
00453   optimizeRangeMainPad(argv, effPad, tmpHists)
00454   
00455   firstD = True
00456   if refFile != None:
00457     for histo,color in zip(divHistos,colors):
00458         diffPad.cd()
00459         histo.SetMarkerSize(1)
00460         histo.SetMarkerStyle(20)
00461         histo.SetMarkerColor(color)
00462         histo.GetYaxis().SetLabelSize(0.08)
00463         histo.GetYaxis().SetTitleOffset(0.6)
00464         histo.GetYaxis().SetTitleSize(0.08)
00465         histo.GetXaxis().SetLabelSize(0.)
00466         histo.GetXaxis().SetTitleSize(0.)
00467         if firstD:
00468             if options.logDiv:
00469                 diffPad.SetLogy()
00470             histo.Draw('ex0')
00471             firstD = False
00472         else:
00473             histo.Draw('same ex0')
00474             diffPad.Update()
00475     optimizeRangeSubPad(argv, divHistos)
00476 
00477     effPad.cd()
00478   legend.Draw()
00479   if drawStats:
00480     statsBox.Draw()
00481   canvas.Print(options.out)
00482 
00483 
00484 if __name__ == '__main__':
00485   sys.exit(main())