00001
00005 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
00006 #include <valarray>
00007 #include <functional>
00008 #include <algorithm>
00009 #include <boost/bind.hpp>
00010 #include <ostream>
00011 #include <limits>
00012 #include <assert.h>
00013
00014 #include "Rtypes.h"
00015 #include "TROOT.h"
00016 #include "TFile.h"
00017 #include "TTree.h"
00018 #include "TStyle.h"
00019 #include "TObjArray.h"
00020 #include "TBranch.h"
00021 #include "TH1.h"
00022 #include "TCanvas.h"
00023 #include "Riostream.h"
00024
00025
00026 #include "TBufferFile.h"
00027
00028 namespace {
00029
00030 enum Indices {kUncompressed,kCompressed};
00031
00032 typedef std::valarray<Long64_t> size_type;
00033
00034 size_type getBasketSize( TBranch *);
00035
00036 size_type getBasketSize( TObjArray * branches) {
00037 size_type result(static_cast<Long64_t>(0),2);
00038 size_t n = branches->GetEntries();
00039 for( size_t i = 0; i < n; ++ i ) {
00040 TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
00041 assert( b != 0 );
00042 result += getBasketSize(b);
00043 }
00044 return result;
00045 }
00046
00047 size_type getBasketSize( TBranch * b) {
00048 size_type result(static_cast<Long64_t>(0),2);
00049 if ( b != 0 ) {
00050 if ( b->GetZipBytes() > 0 ) {
00051 result[kUncompressed] = b->GetTotBytes(); result[kCompressed] = b->GetZipBytes();
00052 } else {
00053 result[kUncompressed] = b->GetTotalSize(); result[kCompressed] = b->GetTotalSize();
00054 }
00055 result += getBasketSize( b->GetListOfBranches() );
00056 }
00057 return result;
00058 }
00059
00060
00061 size_type getTotalSize( TBranch * br) {
00062 TBufferFile buf( TBuffer::kWrite, 10000 );
00063 TBranch::Class()->WriteBuffer( buf, br );
00064 size_type size = getBasketSize(br);
00065 if ( br->GetZipBytes() > 0 )
00066 size[kUncompressed] += buf.Length();
00067 return size;
00068 }
00069 }
00070
00071 namespace perftools {
00072
00073 EdmEventSize::EdmEventSize() :
00074 m_nEvents(0) {}
00075
00076 EdmEventSize::EdmEventSize(std::string const & fileName, std::string const & treeName ) :
00077 m_nEvents(0) {
00078 parseFile(fileName);
00079 }
00080
00081 void EdmEventSize::parseFile(std::string const & fileName, std::string const & treeName) {
00082 m_fileName = fileName;
00083 m_branches.clear();
00084
00085 TFile * file = TFile::Open( fileName.c_str() );
00086 if( file==0 || ( !(*file).IsOpen() ) )
00087 throw Error( "unable to open data file " + fileName, 7002);
00088
00089 TObject * o = file->Get(treeName.c_str() );
00090 if ( o == 0 )
00091 throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
00092
00093 TTree * events = dynamic_cast<TTree*> (o);
00094 if ( events == 0 )
00095 throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
00096
00097 m_nEvents = events->GetEntries();
00098 if ( m_nEvents == 0 )
00099 throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
00100
00101
00102 TObjArray * branches = events->GetListOfBranches();
00103 if ( branches == 0 )
00104 throw Error("tree \"" + treeName+ "\" in file " + fileName + " contains no branches", 7006);
00105
00106 const size_t n = branches->GetEntries();
00107 m_branches.reserve(n);
00108 for( size_t i = 0; i < n; ++i ) {
00109 TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
00110 if (b==0) continue;
00111 std::string const name( b->GetName() );
00112 if ( name == "EventAux" ) continue;
00113 size_type s = getTotalSize(b);
00114 m_branches.push_back( BranchRecord(name, double(s[kCompressed])/double(m_nEvents), double(s[kUncompressed])/double(m_nEvents)) );
00115 }
00116 std::sort(m_branches.begin(),m_branches.end(),
00117 boost::bind(std::greater<double>(),
00118 boost::bind(&BranchRecord::compr_size,_1),
00119 boost::bind(&BranchRecord::compr_size,_2))
00120 );
00121
00122 }
00123
00124 void EdmEventSize::sortAlpha() {
00125 std::sort(m_branches.begin(),m_branches.end(),
00126 boost::bind(std::less<std::string>(),
00127 boost::bind(&BranchRecord::name,_1),
00128 boost::bind(&BranchRecord::name,_2))
00129 );
00130
00131 }
00132
00133 namespace detail {
00134
00135 void shorterName(EdmEventSize::BranchRecord & br) {
00136 size_t b = br.fullName.find('_');
00137 size_t e = br.fullName.rfind('_');
00138 if (b==e) br.name=br.fullName;
00139 else {
00140
00141 br.name = br.fullName.substr(b+1,e-b-1);
00142
00143 e = br.name.rfind('_');
00144 if (e!=std::string::npos) br.name.replace(e,1,":");
00145
00146 br.name.append(" ("+br.fullName.substr(0,b)+")");
00147 }
00148 }
00149
00150 }
00151
00152 void EdmEventSize::formatNames() {
00153 std::for_each(m_branches.begin(),m_branches.end(),
00154 &detail::shorterName);
00155 }
00156
00157
00158
00159 namespace detail {
00160
00161 void dump(ostream& co, EdmEventSize::BranchRecord const & br) {
00162 co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
00163 }
00164 }
00165
00166
00167 void EdmEventSize::dump(std::ostream & co, bool header) const {
00168 if (header) {
00169 co << "File " << m_fileName << " Events " << m_nEvents << "\n";
00170 co <<"Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
00171 }
00172 std::for_each(m_branches.begin(),m_branches.end(),
00173 boost::bind(detail::dump,boost::ref(co),_1));
00174 }
00175
00176 namespace detail {
00177
00178 struct Hist {
00179
00180 explicit Hist(int itop) :
00181 top(itop),
00182 uncompressed( "uncompressed", "branch sizes", top, -0.5, - 0.5 + top ),
00183 compressed( "compressed", "branch sizes", top, -0.5, - 0.5 + top ),
00184 cxAxis(compressed.GetXaxis()),
00185 uxAxis(uncompressed.GetXaxis()),
00186 x(0) {}
00187
00188 void fill(EdmEventSize::BranchRecord const & br) {
00189 if ( x < top ) {
00190 cxAxis->SetBinLabel( x + 1, br.name.c_str() );
00191 uxAxis->SetBinLabel( x + 1, br.name.c_str() );
00192 compressed.Fill( x, br.compr_size );
00193 uncompressed.Fill( x, br.uncompr_size );
00194 x++;
00195 }
00196 }
00197
00198 void finalize() {
00199 double mn = std::numeric_limits<double>::max();
00200 for( int i = 1; i <= top; ++i ) {
00201 double cm = compressed.GetMinimum( i ), um = uncompressed.GetMinimum( i );
00202 if ( cm > 0 && cm < mn ) mn = cm;
00203 if ( um > 0 && um < mn ) mn = um;
00204 }
00205 mn *= 0.8;
00206 double mx = max( compressed.GetMaximum(), uncompressed.GetMaximum() );
00207 mx *= 1.2;
00208 uncompressed.SetMinimum( mn );
00209 uncompressed.SetMaximum( mx );
00210 compressed.SetMinimum( mn );
00211
00212 cxAxis->SetLabelOffset( -0.32 );
00213 cxAxis->LabelsOption( "v" );
00214 cxAxis->SetLabelSize( 0.03 );
00215 uxAxis->SetLabelOffset( -0.32 );
00216 uxAxis->LabelsOption( "v" );
00217 uxAxis->SetLabelSize( 0.03 );
00218 compressed.GetYaxis()->SetTitle( "Bytes" );
00219 compressed.SetFillColor( kBlue );
00220 compressed.SetLineWidth( 2 );
00221 uncompressed.GetYaxis()->SetTitle( "Bytes" );
00222 uncompressed.SetFillColor( kRed );
00223 uncompressed.SetLineWidth( 2 );
00224
00225 }
00226
00227 int top;
00228 TH1F uncompressed;
00229 TH1F compressed;
00230 TAxis * cxAxis;
00231 TAxis * uxAxis;
00232
00233 int x;
00234 };
00235
00236 }
00237
00238 void EdmEventSize::produceHistos(std::string const & plot, std::string const & file, int top) const {
00239 if (top==0) top = m_branches.size();
00240 detail::Hist h(top);
00241 std::for_each(m_branches.begin(),m_branches.end(),
00242 boost::bind(&detail::Hist::fill,boost::ref(h),_1));
00243 h.finalize();
00244 if( !plot.empty() ) {
00245 gROOT->SetStyle( "Plain" );
00246 gStyle->SetOptStat( kFALSE );
00247 gStyle->SetOptLogy();
00248 TCanvas c;
00249 h.uncompressed.Draw();
00250 h.compressed.Draw( "same" );
00251 c.SaveAs( plot.c_str() );
00252 }
00253 if ( !file.empty() ) {
00254 TFile f( file.c_str(), "RECREATE" );
00255 h.compressed.Write();
00256 h.uncompressed.Write();
00257 f.Close();
00258 }
00259
00260 }
00261
00262 }