CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
MultipleCompare.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 import FWCore.ParameterSet.Config as cms
4 import sys
5 import os
6 import math
7 import re
9 from optparse import OptionParser
10 from ROOT import *
11 
12 __author__ = "Mauro Verzetti (mauro.verzetti@cern.ch) and Lucia Perrini (lucia.perrini@cern.ch)"
13 __doc__ = """Script to plot the content of a Validation .root file and compare it to a different file:\n\n
14 Usage: MultipleCompare.py -T testFile -R refFile [options] [search strings that you want to apply '*' is supported as special character]"""
15 
17  sys.argv = argv
18  parser = OptionParser(description=__doc__)
19  parser.add_option('--myhelp',metavar='', action="store_true",help='prints this output message',dest='help',default = False)
20  parser.add_option('--TestFile','-T',metavar='testFile', type=str,help='Sets the test file',dest='test',default = '')
21  parser.add_option('--RefFile','-R',metavar='refFile', type=str,help='Sets the reference file',dest='ref',default = None)
22  parser.add_option('--output','-o',metavar='outputFile', type=str,help='Sets the output file',dest='out',default = 'MultipleCompare.png')
23  parser.add_option('--logScaleY',action="store_true", dest="logScaleY", default=False, help="Sets the log scale in the plot (Y axis)")
24  parser.add_option('--logScaleX',action="store_true", dest="logScaleX", default=False, help="Sets the log scale in the plot (X axis)")
25  parser.add_option('--fakeRate','-f',action="store_true", dest="fakeRate", default=False, help="Sets the fake rate options and put the correct label (implies --logScale)")
26  parser.add_option('--testLabel','-t',metavar='testLabel', type=str,help='Sets the label to put in the plots for test file',dest='testLabel',default = None)
27  parser.add_option('--refLabel','-r',metavar='refLabel', type=str,help='Sets the label to put in the plots for ref file',dest='refLabel',default = None)
28  parser.add_option('--sampleLabel','-s',metavar='sampleLabel', type=str,help='Sets the label to indicate the sample used',dest='sampleLabel',default = None)
29  parser.add_option('--maxLogX',metavar='number', type=float,help='Sets the maximum of the scale in log scale both in the main and in the sub pad (requires --logScale or -f to work)',dest='maxLogX',default = 100)
30  parser.add_option('--minLogX',metavar='number', type=float,help='Sets the minimum of the scale in log scale (requires --logScale or -f to work)',dest='minLogX',default = 0.001)
31  parser.add_option('--minLogY',metavar='number', type=float,help='Sets the minimum of the scale in log scale (requires --logScale or -f to work)',dest='minLogY',default = 0.0001)
32  parser.add_option('--maxLogY',metavar='number', type=float,help='Sets the maximum of the scale in log scale (requires --logScale or -f to work)',dest='maxLogY',default = 3)
33  parser.add_option('--minYR',metavar='number', type=float,help='Sets the minimum of the scale in sub pad',dest='minYR',default = 0)
34  parser.add_option('--maxYR',metavar='number', type=float,help='Sets the maximum of the scale in sub pad',dest='maxYR',default = 1.2)
35 # parser.add_option('--minDivY',metavar='number', type=float,help='Sets the minimum of the scale in the ratio pad',dest='minDivY',default = 0.)
36 # parser.add_option('--maxDivY',metavar='number', type=float,help='Sets the maximum of the scale in the ratio pad',dest='maxDivY',default = 2)
37 # parser.add_option('--minDivX',metavar='number', type=float,help='Sets the minimum of the scale in the ratio pad',dest='minDivX',default = 0.)
38 # parser.add_option('--maxDivX',metavar='number', type=float,help='Sets the maximum of the scale in the ratio pad',dest='maxDivX',default = 2)
39  parser.add_option('--logDiv',action="store_true", dest="logDiv", default=False, help="Sets the log scale in the plot")
40  parser.add_option('--normalize',action="store_true", dest="normalize", default=False, help="plot normalized")
41  parser.add_option('--maxRange',metavar='number',type=float, dest="maxRange", default=1.6, help="Sets the maximum range in linear plots")
42  parser.add_option('--maxXaxis',metavar='number',type=float, dest="maxXaxis", default=800, help="Sets the maximum range on x axis in the main pad")
43  parser.add_option('--minXaxis',metavar='number',type=float,help="Sets the minimum range on x axis in the main pad",dest="minXaxis", default=-3)
44  parser.add_option('--maxYaxis',metavar='number',type=float, dest="maxYaxis", default=2, help="Sets the maximum range on Y axis in the main pad")
45  parser.add_option('--minYaxis',metavar='number',type=float, dest="minYaxis", default=0, help="Sets the minimum range on Y axis in the main pad")
46  parser.add_option('--rebin', dest="rebin", type=int, default=-1, help="Sets the rebinning scale")
47  parser.add_option('--branding','-b',metavar='branding', type=str,help='Define a branding to label the plots (in the top right corner)',dest='branding',default = None)
48  #parser.add_option('--search,-s',metavar='searchStrings', type=str,help='Sets the label to put in the plots for ref file',dest='testLabel',default = None) No idea on how to tell python to use all the strings before a new option, thus moving this from option to argument (but may be empty)
49 
50  (options,toPlot) = parser.parse_args()
51  if options.help:
52  parser.print_help()
53  sys.exit(0)
54  return [options, toPlot]
55 
56 def GetContent(dir):
57  tempList = dir.GetListOfKeys()
58  retList = []
59  for it in range(0,tempList.GetSize()):
60  retList.append(tempList.At(it).ReadObj())
61  return retList
62 
63 def MapDirStructure( directory, dirName, objectList ):
64  dirContent = GetContent(directory)
65  for entry in dirContent:
66  if type(entry) is TDirectory or type(entry) is TDirectoryFile:
67  subdirName = os.path.join(dirName,entry.GetName())
68  MapDirStructure(entry, subdirName,objectList)
69  else:
70  pathname = os.path.join(dirName,entry.GetName())
71  objectList.append(pathname)
72 
73 def Match(required, got):
74  for part in required.split('*'):
75  if got.find(part) == -1:
76  return False
77  return True
78 
79 def Divide(hNum,hDen):
80  ret = hNum.Clone('Division')
81  ret.GetYaxis().SetTitle('Ratio')
82  for binI in range(hNum.GetNbinsX()+1):
83  denVal = hDen.GetBinContent(binI)
84  denErr = hDen.GetBinError(binI)
85  numErr = hNum.GetBinError(binI)
86  numVal = hNum.GetBinContent(binI)
87  if denVal == 0:
88  ret.SetBinContent(binI,0)
89  ret.SetBinError(binI,0)
90  else:
91  ret.SetBinContent(binI,numVal/denVal)
92  if numVal==0:
93  ret.SetBinError(binI,1)
94  else:
95  ret.SetBinError(binI,(numVal/denVal)*math.sqrt(math.pow(numErr/numVal,2) + math.pow(denErr/denVal,2) ) )
96  return ret
97 
99  #automatically derive all plot types in the future?
100  type = ''
101  label = ''
102  prefix = ''
103  #assuming plots name like: tauType_plotType_xAxis or tauType_plotType_selection
104  matches = re.match(r'.*/(.*)_(.*)_(.*)', name)
105  if matches:
106  prefix = matches.group(1)
107  label = matches.group(3)
108  knowntypes = (['pTRatio','SumPt','Size'])
109  for knowntype in knowntypes:
110  if matches.group(2) == knowntype:
111  type = knowntype
112  if not type: #there are plots labelled ..._vs_...
113  type = 'Eff'
114  else:
115  type = 'Eff'
116 
117  prefixParts = prefix.partition('Discrimination')
118  if prefixParts[2] != '':
119  prefix = prefixParts[2]
120  prefixParts = prefix.partition('By')
121  if prefixParts[2] != '':
122  prefix = prefixParts[2]
123 
124  #print 'type is ' + type
125  return [type, label, prefix]
126 
127 def DrawTitle(text):
128  title = TLatex()
129  title.SetNDC()
130  title.SetTextAlign(12)#3*10=right,3*1=top
131  title.SetTextSize(.035)
132  leftMargin = gStyle.GetPadLeftMargin()
133  topMargin = 1 - 0.5*gStyle.GetPadTopMargin()
134  title.DrawLatex(leftMargin, topMargin, text)
135 
136 def DrawBranding(options, label=''):
137  if options.branding != None or label != '':
138  text = TLatex()
139  text.SetNDC();
140  text.SetTextAlign(11)#3*10=right,3*1=top
141  text.SetTextSize(.025)
142  text.SetTextColor(13)
143  if options.out.find(".eps")!=-1:
144  text.SetTextAngle(-91.0)#eps BUG
145  else:
146  text.SetTextAngle(-90.0)
147  rightMargin = 1 - gStyle.GetPadRightMargin()
148  topMargin = 1 - gStyle.GetPadTopMargin()
149  if label!='':
150  label += ': '
151  text.DrawLatex(rightMargin+.01, topMargin+0.025, label+options.branding);
152 
153 
154 def FindParents(histoPath):
155  root = histoPath[:histoPath.find('_')]
156  par = histoPath[histoPath.find('Eff')+3:]
157  validationPlots = validation.proc.efficiencies.plots._Parameterizable__parameterNames
158  found =0
159  num = ''
160  den = ''
161  for efficiency in validationPlots:
162  effpset = getattr(validation.proc.efficiencies.plots,efficiency)
163  effName = effpset.efficiency.value()
164  effNameCut = effName[effName.find('_'):effName.find('#')]
165  if effNameCut in histoPath:
166  if found == 1:
167  print 'More than one pair of parents found for ' + histopath + ':'
168  assert(False)
169  num = root + effpset.numerator.value()[effName.find('_'):].replace('#PAR#',par)
170  den = root + effpset.denominator.value()[effName.find('_'):].replace('#PAR#',par)
171  found += 1
172  return [num,den]
173 
174 def Rebin(tfile, histoPath, rebinVal):
175  parents = FindParents(histoPath)
176  num = tfile.Get(parents[0])
177  if type(num) != TH1F:
178  print 'Looking for ' + num
179  print 'Plot now found! What the hell are you doing? Exiting...'
180  sys.exit()
181  denSingle = tfile.Get(parents[1])
182  if type(denSingle) != TH1F:
183  print 'Looking for '+denSingle
184  print 'Plot now found! What the hell are you doing? Exiting...'
185  sys.exit()
186  num.Rebin(rebinVal)
187  den = denSingle.Rebin(rebinVal,'denClone')
188  retVal = num.Clone(histoPath+'Rebin%s'%rebinVal)
189  #print 'Num : ' + parents[0]
190  #print 'Den : ' +parents[1]
191  #print "NumBins: %s DenBins: %s" % (num.GetNbinsX(), den.GetNbinsX() )
192  retVal.Divide(num,den,1,1,'B')
193  return retVal
194 
195 def findRange(hists, min0=-1, max0=-1):
196  if len(hists) < 1:
197  return
198  #auto ranges if no user value provided
199  min = min0
200  max = max0
201  if min0 == -1 or max0 == -1:
202  for hist in hists:
203  if min0 == -1:
204  #Divide() sets bin to zero if division not possible. Ignore these bins.
205  minTmp = getMinimumIncludingErrors(hist)
206  if minTmp < min or min == -1:
207  min = minTmp
208  if max0 == -1:
209  maxTmp = getMaximumIncludingErrors(hist)
210  if maxTmp > max or max == -1:
211  max = maxTmp
212  return [min, max]
213 
214 def optimizeRangeMainPad(argv, pad, hists, maxLogX_, minX_, maxX_, maxLogY_, minY_, maxY_):
215  pad.Update()
216  if pad.GetLogy():
217  if maxLogY_ > 0:
218  maxLogY = maxLogY_
219  else:
220  maxLogY = -1
221  minY, maxY = findRange(hists, -1, maxLogY)
222  else:
223  minY, maxY = findRange(hists, minY_, maxY_)
224 
225  if pad.GetLogy():
226  if minY == 0:
227  minY = 0.001
228  else:
229  if minY < 0.7:
230  minY = minY #start from zero if possible
231  if maxY <= 1.1 and maxY > 0.7:
232  maxY = 1.2 #prefere fixed range for easy comparison
233  hists[0].SetAxisRange(minY, maxY, "Y")
234 
235  if pad.GetLogx():
236  if maxLogX_ > 0:
237  maxLogX = maxLogX_
238  else:
239  maxLogX = -1
240  minX, maxX = findRange(hists, -1, maxLogX)
241  else:
242  minX, maxX = findRange(hists, minX_, maxX_)
243 
244  if pad.GetLogx():
245  if minX == 0:
246  minX = 0.001
247  else:
248  if minX < 0.7:
249  minX = minX #start from zero if possible
250  if maxX <= 1.1 and maxX > 0.7:
251  maxX = 1.2 #prefere fixed range for easy comparison
252  hists[0].SetAxisRange(minX, maxX, "X")
253 
254 def optimizeRangeSubPad(argv, pad, hists, maxLogX_, minX_, maxX_, minYRatio_, maxYRatio_):
255  pad.Update()
256  if pad.GetLogx():
257  if maxLogX_ > 0:
258  maxLogX = maxLogX_
259  else:
260  maxLogX = -1
261  minX, maxX = findRange(hists, -1, maxLogX)
262  else:
263  minX, maxX = findRange(hists, minX_, maxX_)
264  if pad.GetLogx():
265  if minX == 0:
266  minX = 0.001
267  else:
268  if minX < 0.7:
269  minX = minX #start from zero if possible
270  if maxX <= 1.1 and maxX > 0.7:
271  maxX = 1.2 #prefere fixed range for easy comparison
272  hists[0].SetAxisRange(minX, maxX, "X")
273 
274  min = -1
275  max = -1
276  if minYRatio_ > 0:
277  min = minYRatio_
278  if maxYRatio_ > 0:
279  max = maxYRatio_
280  min, max = findRange(hists, min, max)
281  if max > 2:
282  max = 2 #maximal bound
283  hists[0].SetAxisRange(min, max, "Y")
284 
286 #find maximum considering also the errors
287  distance = 1.
288  max = -1
289  pos = 0
290  for i in range(1, hist.GetNbinsX()):
291  if hist.GetBinContent(i) > max:#ignore errors here
292  max = hist.GetBinContent(i)
293  pos = i
294  return max + distance*hist.GetBinError(pos)
295 
297  #find minimum considering also the errors
298  #ignoring zero bins
299  distance = 1.
300  min = -1
301  pos = 0
302  for i in range(1, hist.GetNbinsX()):
303  if hist.GetBinContent(i)<=0.:#ignore errors here
304  continue
305  if hist.GetBinContent(i) < min or min==-1:
306  min = hist.GetBinContent(i)
307  pos = i
308  if min < 0:
309  min = 0
310  return min - distance*hist.GetBinError(pos)
311 
312 
313 def main(argv=None):
314  if argv is None:
315  argv = sys.argv
316 
317  options, toPlot = LoadCommandlineOptions(argv)
318 
319  gROOT.SetStyle('Plain')
320  gROOT.SetBatch()
321  gStyle.SetPalette(1)
322  gStyle.SetOptStat(0)
323  gStyle.SetPadGridX(True)
324  gStyle.SetPadGridY(True)
325  gStyle.SetOptTitle(0)
326  gStyle.SetPadTopMargin(0.1)
327  gStyle.SetPadBottomMargin(0.1)
328  gStyle.SetPadLeftMargin(0.13)
329  gStyle.SetPadRightMargin(0.07)
330 
331 
332  testFile = TFile(options.test)
333  refFile = None
334  if options.ref != None:
335  refFile = TFile(options.ref)
336 
337  #Takes the position of all plots that were produced
338  plotList = []
339  MapDirStructure( testFile,'',plotList)
340 
341  histoList = []
342  for plot in toPlot:
343  for path in plotList:
344  if Match(plot.lower(),path.lower()):
345  histoList.append(path)
346 
347 # print "options: ",options
348 # print "toPlot: ",toPlot
349  print histoList
350 
351  if len(histoList)<1:
352  print '\tError: Please specify at least one histogram.'
353  if len(toPlot)>0:
354  print 'Check your plot list:', toPlot
355  sys.exit()
356 
357 
358  #WARNING: For now the hist type is assumed to be constant over all histos.
359  histType, label, prefix = DetermineHistType(histoList[0])
360  #decide whether or not to scale to an integral of 1
361  #should usually not be used most plot types. they are already normalized.
362  scaleToIntegral = False
363  if options.normalize:
364  scaleToIntegral = True
365 
366  ylabel = 'Efficiency'
367 
368  if options.fakeRate:
369  ylabel = 'Fake rate'
370 
371  drawStats = False
372  if histType=='pTRatio' and len(histoList)<3:
373  drawStats = True
374 
375  #legend = TLegend(0.50,0.73,0.50+0.37,1)
376  x1 = 0.33
377  x2 = 1-gStyle.GetPadRightMargin()
378  y2 = 1-gStyle.GetPadTopMargin()
379  lineHeight = .055
380  if len(histoList) == 1:
381  lineHeight = .05
382  y1 = y2 - lineHeight*len(histoList)
383  legend = TLegend(x1,y1,x2,y2)
384  legend.SetHeader(label)
385  legend.SetFillColor(0)
386  legend.SetTextSize(0.032)
387  if drawStats:
388  y2 = y1
389  y1 = y2 - .07*len(histoList)
390  statsBox = TPaveText(x1,y1,x2,y2,"NDC")
391  statsBox.SetFillColor(0)
392  statsBox.SetTextAlign(12)#3*10=right,3*1=top
393  statsBox.SetMargin(0.05)
394  statsBox.SetBorderSize(1)
395 
396 
397  canvas = TCanvas('MultiPlot','MultiPlot',validation.standardDrawingStuff.canvasSizeX.value(),832)
398  effPad = TPad('effPad','effPad',0.01,0.35,0.99,0.99)#0,0.25,1.,1.,0,0)
399  effPad.SetBottomMargin(0.0)#0.1)
400  #effPad.SetTopMargin(0.1)
401  #effPad.SetLeftMargin(0.13)
402  #effPad.SetRightMargin(0.07)
403  effPad.Draw()
404  header = ''
405  if options.sampleLabel != None:
406  header += 'Sample: '+options.sampleLabel
407  if options.testLabel != None:
408  header += ' Dots: '+options.testLabel
409  if options.refLabel != None:
410  header += ' Line: '+options.refLabel
411  DrawTitle(header)
412  DrawBranding(options)
413  diffPad = TPad('diffPad','diffPad',0.01,0.01,0.99,0.32)#0.,0.,1,.25,0,0)
414  diffPad.SetTopMargin(0.00);
415  diffPad.SetBottomMargin(0.30);
416  diffPad.Draw()
417  colors = [2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1,2,3,4,6,5,7,28,1]
418  first = True
419  divHistos = []
420  statTemplate = '%s Mean: %.3f RMS: %.3f'
421  testHs = []
422  refHs = []
423  for histoPath,color in zip(histoList,colors):
424  if(options.rebin == -1):
425  testH = testFile.Get(histoPath)
426  else:
427  testH = Rebin(testFile,histoPath,options.rebin)
428  if type(testH) != TH1F:
429  print 'Looking for '+histoPath
430  print 'Test plot now found! What the hell are you doing? Exiting...'
431  sys.exit()
432  testHs.append(testH)
433  xAx = histoPath[histoPath.find('Eff')+len('Eff'):]
434  effPad.cd()
435  if not testH.GetXaxis().GetTitle(): #only overwrite label if none already existing
436  if hasattr(validation.standardDrawingStuff.xAxes,xAx):
437  testH.GetXaxis().SetTitle( getattr(validation.standardDrawingStuff.xAxes,xAx).xAxisTitle.value())
438  if not testH.GetYaxis().GetTitle(): #only overwrite label if none already existing
439  testH.GetYaxis().SetTitle(ylabel)
440  if label!='':
441  testH.GetXaxis().SetTitle(label+': '+testH.GetXaxis().GetTitle())
442  testH.GetXaxis().SetTitleOffset(1.1)
443  testH.GetXaxis().SetRangeUser(options.minXaxis,options.maxXaxis)
444  testH.GetYaxis().SetTitleOffset(1.1)
445  #testH.GetYaxis().SetTitleSize(0.08)
446  #testH.GetYaxis().CenterTitle()
447  testH.SetMarkerSize(1)
448  testH.SetMarkerStyle(20)
449  testH.SetMarkerColor(color)
450  if histType == 'Eff':
451  legend.AddEntry(testH,histoPath[histoPath.rfind('/')+1:histoPath.find(histType)],'p')
452  else:
453  legend.AddEntry(testH,DetermineHistType(histoPath)[2],'p')
454  if drawStats:
455  text = statsBox.AddText(statTemplate % ('Dots',testH.GetMean(), testH.GetRMS()) )
456  text.SetTextColor(color)
457  if first:
458  first = False
459  if options.logScaleY:
460  effPad.SetLogy()
461  if options.logScaleX:
462  effPad.SetLogx()
463  diffPad.SetLogx()
464  if scaleToIntegral:
465  if testH.GetEntries() > 0:
466  if not testH.GetSumw2N():
467  testH.Sumw2()
468  testH.DrawNormalized('ex0 P')
469  else:
470  print "--> Warning! You tried to normalize a histogram which seems to be already scaled properly. Draw it unscaled."
471  scaleToIntegral = False
472  testH.Draw('ex0')
473  else:
474  testH.Draw('ex0')
475  else:
476  if scaleToIntegral:
477  if testH.GetEntries() > 0:
478  testH.DrawNormalized('same p')
479  else:
480  testH.Draw('same ex0 l')
481  if refFile == None:
482  continue
483  if(options.rebin == -1):
484  refH = refFile.Get(histoPath)
485  else:
486  refH = Rebin(refFile,histoPath,options.rebin)
487  if type(refH) != TH1F:
488  continue
489  refHs.append(refH)
490  refH.SetLineColor(color)
491  refH.SetLineWidth(1)
492  if scaleToIntegral:
493  if testH.GetEntries() > 0:
494  refH.DrawNormalized('same hist')
495  else:
496  refH.DrawCopy('same hist')
497  if drawStats:
498  text = statsBox.AddText(statTemplate % ('Line',refH.GetMean(), refH.GetRMS()) )
499  text.SetTextColor(color)
500  #refH.SetFillColor(color)
501  #refH.SetFillStyle(3001)
502  if scaleToIntegral:
503  entries = testH.GetEntries()
504  if entries > 0:
505  testH.Scale(1./entries)
506  entries = refH.GetEntries()
507  refH.Sumw2()
508  if entries > 0:
509  refH.Scale(1./entries)
510  refH.Draw('same hist')
511  divHistos.append(Divide(testH,refH))
512 
513  if options.maxLogY > 0:
514  maxlY=options.maxLogY
515  if options.maxLogX > 0:
516  maxlX=options.maxLogX
517 
518  tmpHists = []
519  tmpHists.extend(testHs)
520  tmpHists.extend(refHs)
521  optimizeRangeMainPad(argv, effPad, tmpHists, maxlX, options.minXaxis, options.maxXaxis, maxlY, options.minYaxis, options.maxYaxis)
522 
523  firstD = True
524  if refFile != None:
525  for histo,color in zip(divHistos,colors):
526  diffPad.cd()
527  histo.SetMarkerSize(1)
528  histo.SetMarkerStyle(20)
529  histo.SetMarkerColor(color)
530  histo.GetYaxis().SetLabelSize(0.07)
531  histo.GetYaxis().SetTitleOffset(0.75)
532  histo.GetYaxis().SetTitleSize(0.08)
533  histo.GetXaxis().SetLabelSize(0.08)
534  histo.GetXaxis().SetTitleSize(0.08)
535  #histo.GetYaxis().CenterTitle()
536 
537 
538  if firstD:
539  histo.Draw('ex0')
540  firstD = False
541  else:
542  histo.Draw('same ex0')
543  diffPad.Update()
544 
545  if options.maxLogX > 0:
546  maxlX=options.maxLogX
547  optimizeRangeSubPad(argv, diffPad, divHistos, maxlX, options.minXaxis, options.maxXaxis, options.minYR, options.maxYR)
548 
549  effPad.cd()
550  legend.Draw()
551 
552  if drawStats:
553  statsBox.Draw()
554 
555  canvas.Print(options.out)
556 
557 
558 if __name__ == '__main__':
559  sys.exit(main())
assert(m_qm.get())
if(c.getParameter< edm::InputTag >("puppiValueMap").label().size()!=0)
Definition: main.py:1