CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/PerfTools/EdmEvent/src/EdmEventSize.cc

Go to the documentation of this file.
00001 
00005 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
00006 #include <valarray>
00007 #include <functional>
00008 #include <algorithm>
00009 #include <boost/bind.hpp>
00010 #include <ostream>
00011 #include <limits>
00012 #include <assert.h>
00013 
00014 #include "Rtypes.h"
00015 #include "TROOT.h"
00016 #include "TFile.h"
00017 #include "TTree.h"
00018 #include "TStyle.h"
00019 #include "TObjArray.h"
00020 #include "TBranch.h"
00021 #include "TH1.h"
00022 #include "TCanvas.h"
00023 #include "Riostream.h"
00024 // #include "FWCore/FWLite/src/AutoLibraryLoader.h"
00025 
00026 #include "TBufferFile.h"
00027 
00028 namespace {
00029 
00030   enum Indices {kUncompressed,kCompressed};
00031 
00032   typedef std::valarray<Long64_t> size_type; 
00033 
00034   size_type getBasketSize( TBranch *);
00035   
00036   size_type getBasketSize( TObjArray * branches) {
00037     size_type result(static_cast<Long64_t>(0),2);
00038     size_t n = branches->GetEntries();
00039     for( size_t i = 0; i < n; ++ i ) {
00040       TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
00041       assert( b != 0 );
00042       result += getBasketSize(b);
00043     }
00044     return result;
00045   }
00046   
00047   size_type getBasketSize( TBranch * b) {
00048     size_type result(static_cast<Long64_t>(0),2);
00049     if ( b != 0 ) {
00050       if ( b->GetZipBytes() > 0 ) {
00051         result[kUncompressed]  = b->GetTotBytes();  result[kCompressed] = b->GetZipBytes();
00052       } else {
00053         result[kUncompressed] = b->GetTotalSize(); result[kCompressed] = b->GetTotalSize();
00054       }
00055       result += getBasketSize( b->GetListOfBranches() );
00056     }
00057     return result;
00058   }
00059 
00060 
00061   size_type getTotalSize( TBranch * br) {
00062     TBufferFile buf( TBuffer::kWrite, 10000 );
00063     TBranch::Class()->WriteBuffer( buf, br );
00064     size_type size = getBasketSize(br);
00065     if ( br->GetZipBytes() > 0 )
00066       size[kUncompressed] += buf.Length();
00067     return size;
00068   }
00069 }
00070 
00071 namespace perftools {
00072 
00073   EdmEventSize::EdmEventSize() : 
00074     m_nEvents(0) {}
00075   
00076   EdmEventSize::EdmEventSize(std::string const & fileName, std::string const & treeName ) : 
00077     m_nEvents(0) {
00078     parseFile(fileName);
00079   }
00080   
00081   void EdmEventSize::parseFile(std::string const & fileName, std::string const & treeName) {
00082     m_fileName = fileName;
00083     m_branches.clear();
00084 
00085     TFile * file = TFile::Open( fileName.c_str() );
00086     if( file==0  || ( !(*file).IsOpen() ) )
00087       throw Error( "unable to open data file " + fileName, 7002);
00088     
00089     TObject * o = file->Get(treeName.c_str() );
00090     if ( o == 0 )
00091       throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
00092     
00093     TTree * events = dynamic_cast<TTree*> (o);
00094     if ( events == 0 )
00095       throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
00096     
00097     m_nEvents = events->GetEntries();
00098     if ( m_nEvents == 0 )
00099       throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
00100 
00101 
00102     TObjArray * branches = events->GetListOfBranches();
00103     if ( branches == 0 )
00104       throw Error("tree \"" + treeName+ "\" in file " + fileName + " contains no branches", 7006);
00105     
00106     const size_t n =  branches->GetEntries();
00107     m_branches.reserve(n);
00108     for( size_t i = 0; i < n; ++i ) {
00109       TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
00110       if (b==0) continue;
00111       std::string const name( b->GetName() );
00112       if ( name == "EventAux" ) continue;
00113       size_type s = getTotalSize(b);
00114       m_branches.push_back( BranchRecord(name, double(s[kCompressed])/double(m_nEvents), double(s[kUncompressed])/double(m_nEvents)) );
00115     }
00116     std::sort(m_branches.begin(),m_branches.end(), 
00117               boost::bind(std::greater<double>(),
00118                           boost::bind(&BranchRecord::compr_size,_1),
00119                           boost::bind(&BranchRecord::compr_size,_2))
00120               );
00121 
00122   }
00123   
00124   void EdmEventSize::sortAlpha() {
00125     std::sort(m_branches.begin(),m_branches.end(), 
00126               boost::bind(std::less<std::string>(),
00127                           boost::bind(&BranchRecord::name,_1),
00128                           boost::bind(&BranchRecord::name,_2))
00129               );
00130 
00131   }
00132 
00133   namespace detail {
00134     // format as product:label (type)
00135     void shorterName(EdmEventSize::BranchRecord & br) {
00136       size_t b = br.fullName.find('_');
00137       size_t e = br.fullName.rfind('_');
00138       if (b==e) br.name=br.fullName;
00139       else {
00140         // remove type and process
00141         br.name = br.fullName.substr(b+1,e-b-1);
00142         // change label separator in :
00143         e = br.name.rfind('_');
00144         if (e!=std::string::npos) br.name.replace(e,1,":");
00145         // add the type name
00146         br.name.append(" ("+br.fullName.substr(0,b)+")");
00147       }
00148     }
00149 
00150   }
00151   
00152   void EdmEventSize::formatNames() {
00153     std::for_each(m_branches.begin(),m_branches.end(),
00154                   &detail::shorterName);
00155   }
00156 
00157 
00158 
00159   namespace detail {
00160 
00161     void dump(ostream& co, EdmEventSize::BranchRecord const & br) {
00162       co << br.name << " " <<  br.uncompr_size <<  " " << br.compr_size << "\n"; 
00163     }
00164   }
00165 
00166   
00167   void EdmEventSize::dump(std::ostream & co, bool header) const {
00168     if (header) {
00169       co << "File " << m_fileName << " Events " << m_nEvents << "\n";
00170       co <<"Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
00171     }
00172     std::for_each(m_branches.begin(),m_branches.end(),
00173                   boost::bind(detail::dump,boost::ref(co),_1));
00174   }
00175 
00176   namespace detail {
00177 
00178     struct Hist {
00179 
00180       explicit Hist(int itop) : 
00181         top(itop),
00182         uncompressed( "uncompressed", "branch sizes", top, -0.5, - 0.5 + top ),
00183         compressed( "compressed", "branch sizes", top, -0.5, - 0.5 + top ),
00184         cxAxis(compressed.GetXaxis()),
00185         uxAxis(uncompressed.GetXaxis()),
00186         x(0) {}
00187       
00188       void fill(EdmEventSize::BranchRecord const & br) {
00189         if ( x < top ) {
00190           cxAxis->SetBinLabel( x + 1, br.name.c_str() );
00191           uxAxis->SetBinLabel( x + 1, br.name.c_str() );
00192           compressed.Fill( x, br.compr_size );
00193           uncompressed.Fill( x, br.uncompr_size );
00194           x++;
00195         }
00196       }
00197 
00198       void finalize() {
00199         double mn = std::numeric_limits<double>::max();
00200         for( int i = 1; i <= top; ++i ) {
00201           double cm = compressed.GetMinimum( i ), um = uncompressed.GetMinimum( i );
00202           if ( cm > 0 && cm < mn ) mn = cm;
00203           if ( um > 0 && um < mn ) mn = um;
00204         }
00205         mn *= 0.8;
00206         double mx = max( compressed.GetMaximum(), uncompressed.GetMaximum() );
00207         mx *= 1.2;
00208         uncompressed.SetMinimum( mn );
00209         uncompressed.SetMaximum( mx );
00210         compressed.SetMinimum( mn );
00211         //  compressed.SetMaximum( mx );
00212         cxAxis->SetLabelOffset( -0.32 );
00213         cxAxis->LabelsOption( "v" );
00214         cxAxis->SetLabelSize( 0.03 );
00215         uxAxis->SetLabelOffset( -0.32 );
00216         uxAxis->LabelsOption( "v" );
00217         uxAxis->SetLabelSize( 0.03 );
00218         compressed.GetYaxis()->SetTitle( "Bytes" );
00219         compressed.SetFillColor( kBlue );
00220         compressed.SetLineWidth( 2 );
00221         uncompressed.GetYaxis()->SetTitle( "Bytes" );
00222         uncompressed.SetFillColor( kRed );
00223         uncompressed.SetLineWidth( 2 );
00224         
00225       }
00226       
00227       int top;
00228       TH1F uncompressed;
00229       TH1F compressed;
00230       TAxis * cxAxis;
00231       TAxis * uxAxis;
00232       
00233       int x;
00234     };
00235   
00236   }
00237   
00238   void EdmEventSize::produceHistos(std::string const & plot, std::string const & file, int top) const {
00239     if (top==0) top = m_branches.size();
00240     detail::Hist h(top);
00241     std::for_each(m_branches.begin(),m_branches.end(),
00242                   boost::bind(&detail::Hist::fill,boost::ref(h),_1));
00243     h.finalize();
00244     if( !plot.empty() ) {
00245       gROOT->SetStyle( "Plain" );
00246       gStyle->SetOptStat( kFALSE );
00247       gStyle->SetOptLogy();
00248       TCanvas c;
00249       h.uncompressed.Draw();
00250       h.compressed.Draw( "same" );
00251       c.SaveAs( plot.c_str() );
00252     }
00253     if ( !file.empty() ) {
00254       TFile f( file.c_str(), "RECREATE" );
00255       h.compressed.Write();
00256       h.uncompressed.Write();
00257       f.Close();
00258     }
00259 
00260   }
00261 
00262 }