CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MultipleCompare.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 import FWCore.ParameterSet.Config as cms
4 import sys
5 import os
6 import math
7 import re
9 from optparse import OptionParser
10 from ROOT import *
11 
12 __author__ = "Mauro Verzetti (mauro.verzetti@cern.ch)"
13 __doc__ = """Script to plot the content of a Validation .root file and compare it to a different file:\n\n
14 Usage: MultipleCompare.py -T testFile -R refFile [options] [search strings that you want to apply '*' is supported as special character]"""
15 
17  sys.argv = argv
18  parser = OptionParser(description=__doc__)
19  parser.add_option('--myhelp',metavar='', action="store_true",help='prints this output message',dest='help',default = False)
20  parser.add_option('--TestFile','-T',metavar='testFile', type=str,help='Sets the test file',dest='test',default = '')
21  parser.add_option('--RefFile','-R',metavar='refFile', type=str,help='Sets the reference file',dest='ref',default = None)
22  parser.add_option('--output','-o',metavar='outputFile', type=str,help='Sets the output file',dest='out',default = 'MultipleCompare.png')
23  parser.add_option('--logScale',action="store_true", dest="logScale", default=False, help="Sets the log scale in the plot")
24  parser.add_option('--fakeRate','-f',action="store_true", dest="fakeRate", default=False, help="Sets the fake rate options and put the correct label (implies --logScale)")
25  parser.add_option('--testLabel','-t',metavar='testLabel', type=str,help='Sets the label to put in the plots for test file',dest='testLabel',default = None)
26  parser.add_option('--refLabel','-r',metavar='refLabel', type=str,help='Sets the label to put in the plots for ref file',dest='refLabel',default = None)
27  parser.add_option('--maxLog',metavar='number', type=float,help='Sets the maximum of the scale in log scale (requires --logScale or -f to work)',dest='maxLog',default = 3)
28  parser.add_option('--minDiv',metavar='number', type=float,help='Sets the minimum of the scale in the ratio pad',dest='minDiv',default = 0.001)
29  parser.add_option('--maxDiv',metavar='number', type=float,help='Sets the maximum of the scale in the ratio pad',dest='maxDiv',default = 2)
30  parser.add_option('--logDiv',action="store_true", dest="logDiv", default=False, help="Sets the log scale in the plot")
31  parser.add_option('--normalize',action="store_true", dest="normalize", default=False, help="plot normalized")
32  parser.add_option('--maxRange',metavar='number',type=float, dest="maxRange", default=1.6, help="Sets the maximum range in linear plots")
33  parser.add_option('--rebin', dest="rebin", type=int, default=-1, help="Sets the rebinning scale")
34  parser.add_option('--branding','-b',metavar='branding', type=str,help='Define a branding to label the plots (in the top right corner)',dest='branding',default = None)
35  #parser.add_option('--search,-s',metavar='searchStrings', type=str,help='Sets the label to put in the plots for ref file',dest='testLabel',default = None) No idea on how to tell python to use all the strings before a new option, thus moving this from option to argument (but may be empty)
36 
37  (options,toPlot) = parser.parse_args()
38  if options.help:
39  parser.print_help()
40  sys.exit(0)
41  return [options, toPlot]
42 
43 def GetContent(dir):
44  tempList = dir.GetListOfKeys()
45  retList = []
46  for it in range(0,tempList.GetSize()):
47  retList.append(tempList.At(it).ReadObj())
48  return retList
49 
50 def MapDirStructure( directory, dirName, objectList ):
51  dirContent = GetContent(directory)
52  for entry in dirContent:
53  if type(entry) is TDirectory or type(entry) is TDirectoryFile:
54  subdirName = os.path.join(dirName,entry.GetName())
55  MapDirStructure(entry, subdirName,objectList)
56  else:
57  pathname = os.path.join(dirName,entry.GetName())
58  objectList.append(pathname)
59 
60 def Match(required, got):
61  for part in required.split('*'):
62  if got.find(part) == -1:
63  return False
64  return True
65 
66 def Divide(hNum,hDen):
67  ret = hNum.Clone('Division')
68  ret.GetYaxis().SetTitle('Ratio')
69  for binI in range(hNum.GetNbinsX()):
70  denVal = hDen.GetBinContent(binI)
71  denErr = hDen.GetBinError(binI)
72  numErr = hNum.GetBinError(binI)
73  numVal = hNum.GetBinContent(binI)
74  if denVal == 0:
75  ret.SetBinContent(binI,0)
76  ret.SetBinError(binI,0)
77  else:
78  ret.SetBinContent(binI,numVal/denVal)
79  if numVal==0:
80  ret.SetBinError(binI,1)
81  else:
82  ret.SetBinError(binI,(numVal/denVal)*math.sqrt(math.pow(numErr/numVal,2) + math.pow(denErr/denVal,2) ) )
83  return ret
84 
86  #automatically derive all plot types in the future?
87  type = ''
88  label = ''
89  prefix = ''
90  #assuming plots name like: tauType_plotType_xAxis or tauType_plotType_selection
91  matches = re.match(r'.*/(.*)_(.*)_(.*)', name)
92  if matches:
93  prefix = matches.group(1)
94  label = matches.group(3)
95  knowntypes = (['pTRatio','SumPt','Size'])
96  for knowntype in knowntypes:
97  if matches.group(2) == knowntype:
98  type = knowntype
99  if not type: #there are plots labelled ..._vs_...
100  type = 'Eff'
101  else:
102  type = 'Eff'
103 
104  prefixParts = prefix.partition('Discrimination')
105  if prefixParts[2] != '':
106  prefix = prefixParts[2]
107  prefixParts = prefix.partition('By')
108  if prefixParts[2] != '':
109  prefix = prefixParts[2]
110 
111  #print 'type is ' + type
112  return [type, label, prefix]
113 
114 def DrawTitle(text):
115  title = TLatex()
116  title.SetNDC()
117  title.SetTextAlign(12)#3*10=right,3*1=top
118  title.SetTextSize(.035)
119  leftMargin = gStyle.GetPadLeftMargin()
120  topMargin = 1 - 0.5*gStyle.GetPadTopMargin()
121  title.DrawLatex(leftMargin, topMargin, text)
122 
123 def DrawBranding(options, label=''):
124  if options.branding != None or label != '':
125  text = TLatex()
126  text.SetNDC();
127  text.SetTextAlign(11)#3*10=right,3*1=top
128  text.SetTextSize(.025)
129  text.SetTextColor(13)
130  if options.out.find(".eps")!=-1:
131  text.SetTextAngle(-91.0)#eps BUG
132  else:
133  text.SetTextAngle(-90.0)
134  rightMargin = 1 - gStyle.GetPadRightMargin()
135  topMargin = 1 - gStyle.GetPadTopMargin()
136  if label!='':
137  label += ': '
138  text.DrawLatex(rightMargin+.01, topMargin+0.025, label+options.branding);
139 
140 
141 def FindParents(histoPath):
142  root = histoPath[:histoPath.find('_')]
143  par = histoPath[histoPath.find('Eff')+3:]
144  validationPlots = validation.proc.efficiencies.plots._Parameterizable__parameterNames
145  found =0
146  num = ''
147  den = ''
148  for efficiency in validationPlots:
149  effpset = getattr(validation.proc.efficiencies.plots,efficiency)
150  effName = effpset.efficiency.value()
151  effNameCut = effName[effName.find('_'):effName.find('#')]
152  if effNameCut in histoPath:
153  if found == 1:
154  print 'More than one pair of parents found for ' + histopath + ':'
155  assert(False)
156  num = root + effpset.numerator.value()[effName.find('_'):].replace('#PAR#',par)
157  den = root + effpset.denominator.value()[effName.find('_'):].replace('#PAR#',par)
158  found += 1
159  return [num,den]
160 
161 def Rebin(tfile, histoPath, rebinVal):
162  parents = FindParents(histoPath)
163  num = tfile.Get(parents[0])
164  if type(num) != TH1F:
165  print 'Looking for '+num
166  print 'Plot now found! What the hell are you doing? Exiting...'
167  sys.exit()
168  denSingle = tfile.Get(parents[1])
169  if type(denSingle) != TH1F:
170  print 'Looking for '+denSingle
171  print 'Plot now found! What the hell are you doing? Exiting...'
172  sys.exit()
173  num.Rebin(rebinVal)
174  den = denSingle.Rebin(rebinVal,'denClone')
175  retVal = num.Clone(histoPath+'Rebin%s'%rebinVal)
176  #print 'Num : ' + parents[0]
177  #print 'Den : ' +parents[1]
178  #print "NumBins: %s DenBins: %s" % (num.GetNbinsX(), den.GetNbinsX() )
179  retVal.Divide(num,den,1,1,'B')
180  return retVal
181 
182 def findRange(hists, min0=-1, max0=-1):
183  if len(hists) < 1:
184  return
185  #auto ranges if no user value provided
186  min = min0
187  max = max0
188  if min0 == -1 or max0 == -1:
189  for hist in hists:
190  if min0 == -1:
191  #Divide() sets bin to zero if division not possible. Ignore these bins.
192  minTmp = getMinimumIncludingErrors(hist)
193  if minTmp < min or min == -1:
194  min = minTmp
195  if max0 == -1:
196  maxTmp = getMaximumIncludingErrors(hist)
197  if maxTmp > max or max == -1:
198  max = maxTmp
199  return [min, max]
200 
201 def optimizeRangeMainPad(argv, pad, hists):
202  pad.Update()
203  if pad.GetLogy() and argv.count('maxLog') > 0:
204  maxLog = options.maxLog
205  else:
206  maxLog = -1
207  min, max = findRange(hists, -1, maxLog)
208  if pad.GetLogy():
209  if min == 0:
210  min = 0.001
211  if max < 2:
212  max = 2. #prefere fixed range for easy comparison
213  else:
214  if min < 0.7:
215  min = 0. #start from zero if possible
216  if max <= 1.1 and max > 0.7:
217  max = 1.2 #prefere fixed range for easy comparison
218  hists[0].SetAxisRange(min, max, "Y")
219 
220 def optimizeRangeSubPad(argv, hists):
221  min = -1
222  max = -1
223  if argv.count('minDiv') > 0:
224  min = options.minDiv
225  if argv.count('maxDiv') > 0:
226  max = options.maxDiv
227  min, max = findRange(hists, min, max)
228  if max > 2:
229  max = 2 #maximal bound
230  hists[0].SetAxisRange(min, max, "Y")
231 
232 
234 #find maximum considering also the errors
235  distance = 1.
236  max = -1
237  pos = 0
238  for i in range(1, hist.GetNbinsX()):
239  if hist.GetBinContent(i) > max:#ignore errors here
240  max = hist.GetBinContent(i)
241  pos = i
242  return max + distance*hist.GetBinError(pos)
243 
245  #find minimum considering also the errors
246  #ignoring zero bins
247  distance = 1.
248  min = -1
249  pos = 0
250  for i in range(1, hist.GetNbinsX()):
251  if hist.GetBinContent(i)<=0.:#ignore errors here
252  continue
253  if hist.GetBinContent(i) < min or min==-1:
254  min = hist.GetBinContent(i)
255  pos = i
256  if min < 0:
257  min = 0
258  return min - distance*hist.GetBinError(pos)
259 
260 
261 def main(argv=None):
262  if argv is None:
263  argv = sys.argv
264 
265  options, toPlot = LoadCommandlineOptions(argv)
266 
267  gROOT.SetStyle('Plain')
268  gROOT.SetBatch()
269  gStyle.SetPalette(1)
270  gStyle.SetOptStat(0)
271  gStyle.SetPadGridX(True)
272  gStyle.SetPadGridY(True)
273  gStyle.SetOptTitle(0)
274  gStyle.SetPadTopMargin(0.1)
275  gStyle.SetPadBottomMargin(0.1)
276  gStyle.SetPadLeftMargin(0.13)
277  gStyle.SetPadRightMargin(0.07)
278 
279 
280  testFile = TFile(options.test)
281  refFile = None
282  if options.ref != None:
283  refFile = TFile(options.ref)
284 
285  #Takes the position of all plots that were produced
286  plotList = []
287  MapDirStructure( testFile,'',plotList)
288 
289  histoList = []
290  for plot in toPlot:
291  for path in plotList:
292  if Match(plot.lower(),path.lower()):
293  histoList.append(path)
294 
295 # print "options: ",options
296 # print "toPlot: ",toPlot
297  print histoList
298 
299  if len(histoList)<1:
300  print '\tError: Please specify at least one histogram.'
301  if len(toPlot)>0:
302  print 'Check your plot list:', toPlot
303  sys.exit()
304 
305 
306  #WARNING: For now the hist type is assumed to be constant over all histos.
307  histType, label, prefix = DetermineHistType(histoList[0])
308  #decide whether or not to scale to an integral of 1
309  #should usually not be used most plot types. they are already normalized.
310  scaleToIntegral = False
311  if options.normalize:
312  scaleToIntegral = True
313 
314  ylabel = 'Efficiency'
315 
316  if options.fakeRate:
317  ylabel = 'Fake rate'
318 
319  drawStats = False
320  if histType=='pTRatio' and len(histoList)<3:
321  drawStats = True
322 
323  #legend = TLegend(0.6,0.83,0.6+0.39,0.83+0.17)
324  x1 = 0.55
325  x2 = 1-gStyle.GetPadRightMargin()
326  y2 = 1-gStyle.GetPadTopMargin()
327  lineHeight = .025
328  if len(histoList) == 1:
329  lineHeight = .05
330  y1 = y2 - lineHeight*len(histoList)
331  legend = TLegend(x1,y1,x2,y2)
332  #legend.SetHeader(label)
333  legend.SetFillColor(0)
334  if drawStats:
335  y2 = y1
336  y1 = y2 - .07*len(histoList)
337  statsBox = TPaveText(x1,y1,x2,y2,"NDC")
338  statsBox.SetFillColor(0)
339  statsBox.SetTextAlign(12)#3*10=right,3*1=top
340  statsBox.SetMargin(0.05)
341  statsBox.SetBorderSize(1)
342 
343 
344  canvas = TCanvas('MultiPlot','MultiPlot',validation.standardDrawingStuff.canvasSizeX.value(),832)
345  effPad = TPad('effPad','effPad',0,0.25,1.,1.,0,0)
346  effPad.SetBottomMargin(0.1);
347  effPad.SetTopMargin(0.1);
348  effPad.SetLeftMargin(0.13);
349  effPad.SetRightMargin(0.07);
350  effPad.Draw()
351  header = ''
352  if options.testLabel != None:
353  header += 'Dots: '+options.testLabel
354  if options.refLabel != None:
355  header += ' Line: '+options.refLabel
356  DrawTitle(header)
357  DrawBranding(options)
358  diffPad = TPad('diffPad','diffPad',0.,0.,1,.25,0,0)
359  diffPad.Draw()
360  colors = [2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1]
361  first = True
362  divHistos = []
363  statTemplate = '%s Mean: %.3f RMS: %.3f'
364  testHs = []
365  refHs = []
366  for histoPath,color in zip(histoList,colors):
367  if(options.rebin == -1):
368  testH = testFile.Get(histoPath)
369  else:
370  testH = Rebin(testFile,histoPath,options.rebin)
371  if type(testH) != TH1F:
372  print 'Looking for '+histoPath
373  print 'Test plot now found! What the hell are you doing? Exiting...'
374  sys.exit()
375  testHs.append(testH)
376  xAx = histoPath[histoPath.find('Eff')+len('Eff'):]
377  effPad.cd()
378  if not testH.GetXaxis().GetTitle(): #only overwrite label if none already existing
379  if hasattr(validation.standardDrawingStuff.xAxes,xAx):
380  testH.GetXaxis().SetTitle( getattr(validation.standardDrawingStuff.xAxes,xAx).xAxisTitle.value())
381  if not testH.GetYaxis().GetTitle(): #only overwrite label if none already existing
382  testH.GetYaxis().SetTitle(ylabel)
383  if label!='':
384  testH.GetXaxis().SetTitle(label+': '+testH.GetXaxis().GetTitle())
385  testH.GetXaxis().SetTitleOffset(1.1)
386  testH.GetYaxis().SetTitleOffset(1.5)
387  testH.SetMarkerSize(1)
388  testH.SetMarkerStyle(20)
389  testH.SetMarkerColor(color)
390  if histType == 'Eff':
391  legend.AddEntry(testH,histoPath[histoPath.rfind('/')+1:histoPath.find(histType)],'p')
392  else:
393  legend.AddEntry(testH,DetermineHistType(histoPath)[2],'p')
394  if drawStats:
395  text = statsBox.AddText(statTemplate % ('Dots',testH.GetMean(), testH.GetRMS()) )
396  text.SetTextColor(color)
397  if first:
398  first = False
399  if options.logScale:
400  effPad.SetLogy()
401  if scaleToIntegral:
402  if testH.GetEntries() > 0:
403  if not testH.GetSumw2N():
404  testH.Sumw2()
405  testH.DrawNormalized('ex0 P')
406  else:
407  print "--> Warning! You tried to normalize a histogram which seems to be already scaled properly. Draw it unscaled."
408  scaleToIntegral = False
409  testH.Draw('ex0')
410  else:
411  testH.Draw('ex0')
412  else:
413  if scaleToIntegral:
414  if testH.GetEntries() > 0:
415  testH.DrawNormalized('same p')
416  else:
417  testH.Draw('same ex0 l')
418  if refFile == None:
419  continue
420  if(options.rebin == -1):
421  refH = refFile.Get(histoPath)
422  else:
423  refH = Rebin(refFile,histoPath,options.rebin)
424  if type(refH) != TH1F:
425  continue
426  refHs.append(refH)
427  refH.SetLineColor(color)
428  refH.SetLineWidth(1)
429  if scaleToIntegral:
430  if testH.GetEntries() > 0:
431  refH.DrawNormalized('same hist')
432  else:
433  refH.DrawCopy('same hist')
434  if drawStats:
435  text = statsBox.AddText(statTemplate % ('Line',refH.GetMean(), refH.GetRMS()) )
436  text.SetTextColor(color)
437  refH.SetFillColor(color)
438  refH.SetFillStyle(3001)
439  if scaleToIntegral:
440  entries = testH.GetEntries()
441  if entries > 0:
442  testH.Scale(1./entries)
443  entries = refH.GetEntries()
444  refH.Sumw2()
445  if entries > 0:
446  refH.Scale(1./entries)
447  refH.Draw('same e3')
448  divHistos.append(Divide(testH,refH))
449 
450  tmpHists = []
451  tmpHists.extend(testHs)
452  tmpHists.extend(refHs)
453  optimizeRangeMainPad(argv, effPad, tmpHists)
454 
455  firstD = True
456  if refFile != None:
457  for histo,color in zip(divHistos,colors):
458  diffPad.cd()
459  histo.SetMarkerSize(1)
460  histo.SetMarkerStyle(20)
461  histo.SetMarkerColor(color)
462  histo.GetYaxis().SetLabelSize(0.08)
463  histo.GetYaxis().SetTitleOffset(0.6)
464  histo.GetYaxis().SetTitleSize(0.08)
465  histo.GetXaxis().SetLabelSize(0.)
466  histo.GetXaxis().SetTitleSize(0.)
467  if firstD:
468  if options.logDiv:
469  diffPad.SetLogy()
470  histo.Draw('ex0')
471  firstD = False
472  else:
473  histo.Draw('same ex0')
474  diffPad.Update()
475  optimizeRangeSubPad(argv, divHistos)
476 
477  effPad.cd()
478  legend.Draw()
479  if drawStats:
480  statsBox.Draw()
481  canvas.Print(options.out)
482 
483 
484 if __name__ == '__main__':
485  sys.exit(main())
def replace
Definition: linker.py:10
if(dp >Float(M_PI)) dp-
Definition: main.py:1