CMS 3D CMS Logo

EdmEventSize.cc
Go to the documentation of this file.
1 
6 #include <valarray>
7 #include <functional>
8 #include <algorithm>
9 #include <boost/bind.hpp>
10 #include <ostream>
11 #include <limits>
12 #include <cassert>
13 
14 #include "Rtypes.h"
15 #include "TROOT.h"
16 #include "TFile.h"
17 #include "TTree.h"
18 #include "TStyle.h"
19 #include "TObjArray.h"
20 #include "TBranch.h"
21 #include "TH1.h"
22 #include "TCanvas.h"
23 #include "Riostream.h"
24 
25 #include "TBufferFile.h"
26 
27 namespace {
28 
29  enum Indices {kUncompressed,kCompressed};
30 
31  typedef std::valarray<Long64_t> size_type;
32 
33  size_type getBasketSize( TBranch *);
34 
35  size_type getBasketSize( TObjArray * branches) {
36  size_type result(static_cast<Long64_t>(0),2);
37  size_t n = branches->GetEntries();
38  for( size_t i = 0; i < n; ++ i ) {
39  TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
40  assert( b != nullptr );
41  result += getBasketSize(b);
42  }
43  return result;
44  }
45 
46  size_type getBasketSize( TBranch * b) {
47  size_type result(static_cast<Long64_t>(0),2);
48  if ( b != nullptr ) {
49  if ( b->GetZipBytes() > 0 ) {
50  result[kUncompressed] = b->GetTotBytes(); result[kCompressed] = b->GetZipBytes();
51  } else {
52  result[kUncompressed] = b->GetTotalSize(); result[kCompressed] = b->GetTotalSize();
53  }
54  result += getBasketSize( b->GetListOfBranches() );
55  }
56  return result;
57  }
58 
59 
60  size_type getTotalSize( TBranch * br) {
61  TBufferFile buf( TBuffer::kWrite, 10000 );
62  TBranch::Class()->WriteBuffer( buf, br );
63  size_type size = getBasketSize(br);
64  if ( br->GetZipBytes() > 0 )
65  size[kUncompressed] += buf.Length();
66  return size;
67  }
68 }
69 
70 namespace perftools {
71 
73  m_nEvents(0) {}
74 
76  m_nEvents(0) {
77  parseFile(fileName);
78  }
79 
80  void EdmEventSize::parseFile(std::string const & fileName, std::string const & treeName) {
82  m_branches.clear();
83 
84  TFile * file = TFile::Open( fileName.c_str() );
85  if( file==nullptr || ( !(*file).IsOpen() ) )
86  throw Error( "unable to open data file " + fileName, 7002);
87 
88  TObject * o = file->Get(treeName.c_str() );
89  if ( o == nullptr )
90  throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
91 
92  TTree * events = dynamic_cast<TTree*> (o);
93  if ( events == nullptr )
94  throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
95 
96  m_nEvents = events->GetEntries();
97  if ( m_nEvents == 0 )
98  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
99 
100 
101  TObjArray * branches = events->GetListOfBranches();
102  if ( branches == nullptr )
103  throw Error("tree \"" + treeName+ "\" in file " + fileName + " contains no branches", 7006);
104 
105  const size_t n = branches->GetEntries();
106  m_branches.reserve(n);
107  for( size_t i = 0; i < n; ++i ) {
108  TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
109  if (b==nullptr) continue;
110  std::string const name( b->GetName() );
111  if ( name == "EventAux" ) continue;
112  size_type s = getTotalSize(b);
113  m_branches.push_back( BranchRecord(name, double(s[kCompressed])/double(m_nEvents), double(s[kUncompressed])/double(m_nEvents)) );
114  }
115  std::sort(m_branches.begin(),m_branches.end(),
116  boost::bind(std::greater<double>(),
117  boost::bind(&BranchRecord::compr_size,_1),
118  boost::bind(&BranchRecord::compr_size,_2))
119  );
120 
121  }
122 
124  std::sort(m_branches.begin(),m_branches.end(),
125  boost::bind(std::less<std::string>(),
126  boost::bind(&BranchRecord::name,_1),
127  boost::bind(&BranchRecord::name,_2))
128  );
129 
130  }
131 
132  namespace detail {
133  // format as product:label (type)
135  size_t b = br.fullName.find('_');
136  size_t e = br.fullName.rfind('_');
137  if (b==e) br.name=br.fullName;
138  else {
139  // remove type and process
140  br.name = br.fullName.substr(b+1,e-b-1);
141  // change label separator in :
142  e = br.name.rfind('_');
143  if (e!=std::string::npos) br.name.replace(e,1,":");
144  // add the type name
145  br.name.append(" ("+br.fullName.substr(0,b)+")");
146  }
147  }
148 
149  }
150 
152  std::for_each(m_branches.begin(),m_branches.end(),
154  }
155 
156 
157 
158  namespace detail {
159 
160  void dump(std::ostream& co, EdmEventSize::BranchRecord const & br) {
161  co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
162  }
163  }
164 
165 
166  void EdmEventSize::dump(std::ostream & co, bool header) const {
167  if (header) {
168  co << "File " << m_fileName << " Events " << m_nEvents << "\n";
169  co <<"Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
170  }
171  std::for_each(m_branches.begin(),m_branches.end(),
172  boost::bind(detail::dump,boost::ref(co),_1));
173  }
174 
175  namespace detail {
176 
177  struct Hist {
178 
179  explicit Hist(int itop) :
180  top(itop),
181  uncompressed( "uncompressed", "branch sizes", top, -0.5, - 0.5 + top ),
182  compressed( "compressed", "branch sizes", top, -0.5, - 0.5 + top ),
183  cxAxis(compressed.GetXaxis()),
184  uxAxis(uncompressed.GetXaxis()),
185  x(0) {}
186 
188  if ( x < top ) {
189  cxAxis->SetBinLabel( x + 1, br.name.c_str() );
190  uxAxis->SetBinLabel( x + 1, br.name.c_str() );
191  compressed.Fill( x, br.compr_size );
192  uncompressed.Fill( x, br.uncompr_size );
193  x++;
194  }
195  }
196 
197  void finalize() {
198  double mn = std::numeric_limits<double>::max();
199  for( int i = 1; i <= top; ++i ) {
200  double cm = compressed.GetMinimum( i ), um = uncompressed.GetMinimum( i );
201  if ( cm > 0 && cm < mn ) mn = cm;
202  if ( um > 0 && um < mn ) mn = um;
203  }
204  mn *= 0.8;
205  double mx = std::max( compressed.GetMaximum(), uncompressed.GetMaximum() );
206  mx *= 1.2;
207  uncompressed.SetMinimum( mn );
208  uncompressed.SetMaximum( mx );
209  compressed.SetMinimum( mn );
210  // compressed.SetMaximum( mx );
211  cxAxis->SetLabelOffset( -0.32 );
212  cxAxis->LabelsOption( "v" );
213  cxAxis->SetLabelSize( 0.03 );
214  uxAxis->SetLabelOffset( -0.32 );
215  uxAxis->LabelsOption( "v" );
216  uxAxis->SetLabelSize( 0.03 );
217  compressed.GetYaxis()->SetTitle( "Bytes" );
218  compressed.SetFillColor( kBlue );
219  compressed.SetLineWidth( 2 );
220  uncompressed.GetYaxis()->SetTitle( "Bytes" );
221  uncompressed.SetFillColor( kRed );
222  uncompressed.SetLineWidth( 2 );
223 
224  }
225 
226  int top;
229  TAxis * cxAxis;
230  TAxis * uxAxis;
231 
232  int x;
233  };
234 
235  }
236 
237  void EdmEventSize::produceHistos(std::string const & plot, std::string const & file, int top) const {
238  if (top==0) top = m_branches.size();
239  detail::Hist h(top);
240  std::for_each(m_branches.begin(),m_branches.end(),
241  boost::bind(&detail::Hist::fill,boost::ref(h),_1));
242  h.finalize();
243  if( !plot.empty() ) {
244  gROOT->SetStyle( "Plain" );
245  gStyle->SetOptStat( kFALSE );
246  gStyle->SetOptLogy();
247  TCanvas c;
248  h.uncompressed.Draw();
249  h.compressed.Draw( "same" );
250  c.SaveAs( plot.c_str() );
251  }
252  if ( !file.empty() ) {
253  TFile f( file.c_str(), "RECREATE" );
254  h.compressed.Write();
255  h.uncompressed.Write();
256  f.Close();
257  }
258 
259  }
260 
261 }
size
Write out results.
edm::ErrorSummaryEntry Error
void dump(std::ostream &co, EdmEventSize::BranchRecord const &br)
FWCore Framework interface EventSetupRecordImplementation h
Helper function to determine trigger accepts.
void fill(EdmEventSize::BranchRecord const &br)
void dump(std::ostream &co, bool header=true) const
dump the ascii table on "co"
void parseFile(std::string const &fileName, std::string const &treeName="Events")
read file, compute branch size, sort by size
Definition: EdmEventSize.cc:80
uint16_t size_type
Indices
Definition: EdmEventSize.cc:29
EdmEventSize()
Constructor.
Definition: EdmEventSize.cc:72
void shorterName(EdmEventSize::BranchRecord &br)
the information for each branch
Definition: EdmEventSize.h:36
double f[11][100]
double b
Definition: hdecay.h:120
void formatNames()
transform Branch names in "formatted" prodcut identifiers
void sortAlpha()
sort by name
void produceHistos(std::string const &plot, std::string const &file, int top=0) const
produce histograms and optionally write them in "file" or as "plot"