CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_3/src/PhysicsTools/PythonAnalysis/python/rootplot/tree2hists.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003 Create ROOT Histograms from one or more ROOT TTrees or TNtuples.
00004 
00005 Options are specified in the given configuration file.
00006 """
00007 
00008 # Create configuration file:
00009 #   tree2hists.py
00010 # Edit, then run with config file:
00011 #   tree2hists.py config.py
00012 
00013 __license__ = '''\
00014 Copyright (c) 2010 Michael Anderson <mbanderson@wisc.edu>
00015 
00016 Permission is hereby granted, free of charge, to any person obtaining a copy
00017 of this software and associated documentation files (the "Software"), to deal
00018 in the Software without restriction, including without limitation the rights
00019 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00020 copies of the Software, and to permit persons to whom the Software is
00021 furnished to do so, subject to the following conditions:
00022 
00023 The above copyright notice and this permission notice shall be included in
00024 all copies or substantial portions of the Software.
00025 
00026 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00027 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00028 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00029 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00030 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00031 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00032 THE SOFTWARE.
00033 '''
00034 
00035 ######## Import python libraries #############################################
00036 
00037 import sys                              # For exiting program
00038 if '-h' in sys.argv or '--help' in sys.argv:
00039     print '''\
00040 Create ROOT Histograms from one or more ROOT TTrees or TNtuples.
00041 
00042 Run by specifying configuration file:
00043   tree2hists config.py
00044 
00045 Create default config file by running with no arguments:
00046   tree2hists'''
00047     sys.exit(0)
00048 try:
00049     from ROOT import TFile, TTree, TH1F, TH2F, TH3F, gROOT
00050 except Exception, e:
00051     print e
00052     print ("Use a python that has PyROOT installed.")
00053     sys.exit(0)
00054 from copy import deepcopy     # For copying histograms
00055 from math import pi           # For use in histogram bounds
00056 from array import array       # For making Float_t array ROOT wants (for hists)
00057 from datetime import datetime # For output filename
00058 from os import path           # For finding file
00059 
00060 ######## Define classes and generators #######################################
00061 
00062 class RootTree:
00063     """Wrapper for TTrees and TNtuples, allowing association with
00064     a scale and cuts."""
00065     def __init__(self, treeName, fileName, scale=1.0, cuts=""):
00066         self.fileName = fileName
00067         self.treeName = treeName
00068         self.scale    = scale
00069         self.cuts     = cuts
00070         self.tfile    = TFile()
00071         self.ttree    = TTree()
00072 
00073 class Plot:
00074     """Wrapper for TH1 objects, associating TTree variables with a histogram"""
00075     def __init__(self, treeVariable, histogram, cuts="", storeErrors=True):
00076         self.treeVariable = treeVariable
00077         self.histogram    = histogram
00078         self.name         = histogram.GetName()
00079         self.cuts         = cuts
00080         if storeErrors: self.histogram.Sumw2()
00081 
00082 def join_cuts(*list_of_cuts):
00083     """Joins list of cuts (strings) into something ROOT can handle.
00084     Example:  given ('1<2','','5>4') returns '1<2&&5>4'"""
00085     list_of_nonempty_cuts = []
00086     for cut in list_of_cuts:
00087         if cut:
00088             list_of_nonempty_cuts.append(cut)
00089     return '&&'.join(list_of_nonempty_cuts)
00090 
00091 def duration_to_string(start, end):
00092     timeTaken = end - start
00093     hours, remainder = divmod(timeTaken.seconds, 3600)
00094     minutes, seconds = divmod(remainder, 60)
00095     if hours>0:
00096         return "%i hours, %i minutes" % (hours, minutes)
00097     elif minutes>0:
00098         return "%i minutes" % minutes
00099     return "%i seconds" % seconds                            
00100 
00101 def write_default_T2H_config():
00102     """Writes configuration file for tree2hists"""
00103     defaultConfig = '''# Configuration file for tree2hists
00104 # Created %s.
00105 try:
00106     ## the normal way to import from rootplot
00107     from rootplot.tree2hists import RootTree, Plot
00108 except ImportError:
00109     ## special import for CMSSW installations of rootplot
00110     from PhysicsTools.PythonAnalysis.rootplot.tree2hists import RootTree, Plot
00111 from array import array      # to allow making Float_t arrays for ROOT hists
00112 from math import pi
00113 from ROOT import TH1F, TH2F  # import other kinds of hists as neeeded
00114 
00115 list_of_files = [RootTree("Treename", fileName="photons.root", scale=1.0, cuts=""),
00116                  RootTree("Treename", fileName="photons2.root", scale=1.0, cuts="")]
00117 
00118 output_filename = "Hists_photons.root"
00119 
00120 cut_for_all_files = "(!TTBit[36] && !TTBit[37] && !TTBit[38] && !TTBit[39] && !vtxIsFake && vtxNdof>4 && abs(vtxZ)<=15)"
00121 
00122 # All plots are made for each "cut set".
00123 # A "cut set" is 3 things: folder name to store hists in, string to add to hist titles, and cuts for these hists.
00124 # Let cut_sets = [] to make all plots.
00125 cut_sets = [
00126     ("barrel15to20", "(|#eta|<1.45, 15<E_{T}<20)", "et>15&&et<20&&abs(eta)<1.45"),
00127     ("barrel20to30", "(|#eta|<1.45, 20<E_{T}<30)", "et>20&&et<30&&abs(eta)<1.45"),
00128     ("endcap15to20", "(1.7<|#eta|<2.5, 15<E_{T}<20)", "et>15&&et<20&&abs(eta)>1.7&&abs(eta)<2.5"),
00129     ("endcap20to30", "(1.7<|#eta|<2.5, 20<E_{T}<30)", "et>20&&et<30&&abs(eta)>1.7&&abs(eta)<2.5")
00130     ]
00131 
00132 # Define histograms to plot
00133 bins_et     = array("f", [15.0, 20.0, 30.0, 50.0, 80.0, 120.0]) # example custom bins
00134 list_of_plots = [
00135     Plot("et"           , TH1F("pho_et"           , "Lead #gamma: E_{T};E_{T} (GeV);entries/bin", 25, 0.0, 100.0)),
00136     Plot("eta"          , TH1F("pho_eta"          , "Lead #gamma: #eta;#eta;entries/bin"        , 25, -3.0, 3.0)),
00137     Plot("et"           , TH1F("pho_et_binned"    , "Lead #gamma: E_{T};E_{T} (GeV);entries/bin", len(bins_et)-1, bins_et)),
00138     Plot("sigmaIetaIeta", TH1F("pho_sigmaIEtaIEta", "Lead #gamma: #sigma_{i#etai#eta};#sigma_{i#etai#eta};entries/bin",20, 0, 0.06)),
00139     Plot("metEt/et"     , TH1F("metEt_over_phoEt" , "MET / E_{T}(#gamma);MET/E_{T}(sc);entries/bin"   , 20, 0.0, 3.0)),
00140     Plot("phi:eta"      , TH2F("phoPhi_vs_phoEta" , "Lead #gamma: #phi vs #eta;#eta;#phi"             , 50, -2.5, 2.5, 18, -pi, pi))
00141     ]
00142 ''' % datetime.now().strftime("%b %d, %Y")
00143     f = open('t2h_config.py', 'w')
00144     f.write(defaultConfig)
00145     f.close()
00146     print "Created default configuration file: t2h_config.py"
00147     print "Edit it, then run by typing:"
00148     print "  tree2hists t2h_config.py"
00149 ##############################################################################
00150 
00151 def make_all_hists_all_files(list_of_RootTrees, list_of_Plots, cut_for_all_files, list_of_cutSets):
00152     '''Open root files one at a time, make plots, then close them.'''
00153     list_of_plots_to_write = []
00154 
00155     # Create plots for each set of cuts
00156     for set_of_cuts in list_of_cutSets:
00157         histname_fix, title_fix, current_cut_set = set_of_cuts
00158         for plot in list_of_Plots:
00159             new_plot  = deepcopy(plot)
00160             new_title = ' '.join((plot.histogram.GetTitle(), title_fix))
00161             new_plot.histogram.SetTitle(new_title)
00162             list_of_plots_to_write.append((new_plot, set_of_cuts))
00163     
00164     for j, rootTree in enumerate(list_of_RootTrees):
00165         rootTree.tfile = TFile(rootTree.fileName, "read")           # Open TFile
00166         if rootTree.tfile.IsZombie():
00167             print "Error opening %s, exiting..." % rootTree.fileName
00168             sys.exit(0)
00169         try:                                      # Try to get TTree from file.
00170             rootTree.tfile.GetObject(rootTree.treeName, rootTree.ttree)
00171         except:
00172             print "Error: %s not found in %s, exiting..." % (rootTree.treeName,
00173                                                              rootTree.fileName)
00174             sys.exit(1)
00175 
00176         print "\n%s: Opened %s  %i MB" % (datetime.now().strftime("%I:%M%p"),
00177                                           rootTree.fileName,
00178                                           path.getsize(rootTree.fileName)/1048576)
00179         print " %s has %i entries, will plot with scale=%.2e, cuts='%s'" % (rootTree.treeName,
00180                                                                             rootTree.ttree.GetEntries(),
00181                                                                             rootTree.scale,
00182                                                                             rootTree.cuts)
00183         
00184         # Loop over plots
00185         print "   # entries                  var >> histogram"
00186         for i, (plot, set_of_cuts) in enumerate(list_of_plots_to_write):
00187             histname_fix, title_fix, current_cut_set = set_of_cuts
00188             tmp_hist = plot.histogram.Clone("temp")    # Create temp hist
00189             all_cuts = join_cuts(cut_for_all_files, rootTree.cuts,
00190                                  current_cut_set, plot.cuts) # Set cuts
00191             rootTree.ttree.Draw( "%s >> temp" % plot.treeVariable, all_cuts,
00192                                  "goff")               # Draw with graphics off
00193             tmp_hist.Scale(rootTree.scale)             # Scale temp
00194             entries_before = plot.histogram.GetEntries()
00195             plot.histogram.Add(tmp_hist)               # Add temp hist to total
00196             entries_after = plot.histogram.GetEntries()
00197             print " %3i %7i %20s >> %s/%s" % (i, entries_after-entries_before,
00198                                               plot.treeVariable, histname_fix,
00199                                               plot.histogram.GetName()),
00200             if plot.cuts:
00201                 print "\textra cuts: %s" % plot.cuts, # plot-specific cuts
00202             print
00203             
00204         rootTree.tfile.Close()                                    # Close TFile
00205         print "%s: Closed %s" % (datetime.now().strftime("%I:%M%p"),
00206                                  rootTree.fileName)
00207     return list_of_plots_to_write
00208 
00209 
00210 ######## Define the main program #############################################
00211 def tree2hists_main(config_file):
00212     try:
00213         # Import only certain variables
00214         sys.path.insert(0, '')
00215         _temp = __import__(config_file, globals(), locals(),
00216                            ['list_of_files','output_filename',
00217                             'cut_for_all_files','cut_sets','list_of_plots'], -1)
00218         list_of_files     = _temp.list_of_files
00219         output_filename   = _temp.output_filename
00220         cut_for_all_files = _temp.cut_for_all_files
00221         cut_sets          = _temp.cut_sets
00222         list_of_plots     = _temp.list_of_plots
00223         for rootTree in list_of_files:
00224             if not path.isfile(rootTree.fileName):
00225                 print "Error:\n  %s\nnot found for input." % rootTree.fileName
00226                 sys.exit(1)
00227         hist_names = [plot.name for plot in list_of_plots]
00228         if len(hist_names)>len(set(hist_names)):
00229             print hist_names
00230             print "Error: Each plot needs a unique name, exiting..."
00231             sys.exit(1)
00232         if path.isfile(output_filename):
00233             print "Warning: %s exists" % output_filename
00234     except Exception, e:
00235         print e
00236         print "Error with %s" % config_file
00237         sys.exit(1)
00238 
00239     if path.isfile('rootlogon.C'):
00240         print "Loading rootlogon.C"
00241         gROOT.Macro('rootlogon.C')    # Load functions from rootlogon script
00242 
00243     if cut_sets:
00244         print "\n%i defined cut sets:" % len(cut_sets)
00245         for cut in cut_sets:
00246             name, title_fix, current_cut_set = cut
00247             print "  %s\t: '%s'" % (name, current_cut_set)
00248         cut_names = [name for name,num,cut in cut_sets]
00249         if len(cut_names)>len(set(cut_names)):
00250             print "Error: Each cut set needs a unique name, exiting..."
00251             sys.exit(1)
00252     else:
00253         cut_sets = [("","","")] # Make all plots, no extra cuts
00254 
00255     print "\nCuts to apply to all files:\n\t'%s'" % cut_for_all_files
00256 
00257     start_time = datetime.now()
00258     list_of_plots_to_write = make_all_hists_all_files(list_of_files,
00259                                                       list_of_plots,
00260                                                       cut_for_all_files,
00261                                                       cut_sets)
00262     end_time = datetime.now()
00263     print "Done drawing all plots after %s." % duration_to_string(start_time, end_time)
00264 
00265     #   Store and save/close files
00266     outputFile = TFile(output_filename, "recreate")
00267     if outputFile.IsZombie():
00268         print "Error opening %s for output exiting..." % output_filename
00269         sys.exit(1)
00270     print "\nOpened output file. Saving histograms..."
00271     outputFile.cd()
00272     for set_of_cuts in cut_sets:
00273         outputFile.mkdir(set_of_cuts[0])
00274     print "   # entries  histogram"
00275     for i, (plot, cutset) in enumerate(list_of_plots_to_write):
00276         outputFile.cd(cutset[0])
00277         print " %3i %7i  %s/%s" % (i, plot.histogram.GetEntries(),
00278                                    cutset[0],
00279                                    plot.histogram.GetName())
00280         plot.histogram.Write()
00281     outputFile.Close()
00282     print "Done saving."
00283     print "\nScaled & added histograms from %i TTrees saved in\n  %s" % (len(list_of_files), output_filename)
00284 ##############################################################################
00285 
00286 def main():
00287     if len(sys.argv) > 1:
00288         if path.isfile(sys.argv[1]):
00289             config_file = sys.argv[1].split('.')[0]
00290             tree2hists_main(config_file)
00291         else:
00292             print "%s not found." % sys.argv[1]
00293             print("Create default config file by running tree2hists "
00294                   "with no argument.")
00295             sys.exit(1)
00296     else:
00297         if path.exists('t2h_config.py'):
00298             print "Run with specific config file, like:"
00299             print "  tree2hists t2h_config.py"
00300             sys.exit(1)
00301         write_default_T2H_config()
00302         sys.exit(0)    
00303 
00304 if __name__ == "__main__":
00305     main()