CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
EdmEventSize.cc
Go to the documentation of this file.
1 
6 #include <valarray>
7 #include <functional>
8 #include <algorithm>
9 #include <boost/bind.hpp>
10 #include <ostream>
11 #include <limits>
12 #include <assert.h>
13 
14 #include "Rtypes.h"
15 #include "TROOT.h"
16 #include "TFile.h"
17 #include "TTree.h"
18 #include "TStyle.h"
19 #include "TObjArray.h"
20 #include "TBranch.h"
21 #include "TH1.h"
22 #include "TCanvas.h"
23 #include "Riostream.h"
24 // #include "FWCore/FWLite/src/AutoLibraryLoader.h"
25 
26 #include "TBufferFile.h"
27 
28 namespace {
29 
30  enum Indices {kUncompressed,kCompressed};
31 
32  typedef std::valarray<Long64_t> size_type;
33 
34  size_type getBasketSize( TBranch *);
35 
36  size_type getBasketSize( TObjArray * branches) {
37  size_type result(static_cast<Long64_t>(0),2);
38  size_t n = branches->GetEntries();
39  for( size_t i = 0; i < n; ++ i ) {
40  TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
41  assert( b != 0 );
42  result += getBasketSize(b);
43  }
44  return result;
45  }
46 
47  size_type getBasketSize( TBranch * b) {
48  size_type result(static_cast<Long64_t>(0),2);
49  if ( b != 0 ) {
50  if ( b->GetZipBytes() > 0 ) {
51  result[kUncompressed] = b->GetTotBytes(); result[kCompressed] = b->GetZipBytes();
52  } else {
53  result[kUncompressed] = b->GetTotalSize(); result[kCompressed] = b->GetTotalSize();
54  }
55  result += getBasketSize( b->GetListOfBranches() );
56  }
57  return result;
58  }
59 
60 
61  size_type getTotalSize( TBranch * br) {
62  TBufferFile buf( TBuffer::kWrite, 10000 );
63  TBranch::Class()->WriteBuffer( buf, br );
64  size_type size = getBasketSize(br);
65  if ( br->GetZipBytes() > 0 )
66  size[kUncompressed] += buf.Length();
67  return size;
68  }
69 }
70 
71 namespace perftools {
72 
74  m_nEvents(0) {}
75 
77  m_nEvents(0) {
78  parseFile(fileName);
79  }
80 
81  void EdmEventSize::parseFile(std::string const & fileName, std::string const & treeName) {
83  m_branches.clear();
84 
85  TFile * file = TFile::Open( fileName.c_str() );
86  if( file==0 || ( !(*file).IsOpen() ) )
87  throw Error( "unable to open data file " + fileName, 7002);
88 
89  TObject * o = file->Get(treeName.c_str() );
90  if ( o == 0 )
91  throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
92 
93  TTree * events = dynamic_cast<TTree*> (o);
94  if ( events == 0 )
95  throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
96 
97  m_nEvents = events->GetEntries();
98  if ( m_nEvents == 0 )
99  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
100 
101 
102  TObjArray * branches = events->GetListOfBranches();
103  if ( branches == 0 )
104  throw Error("tree \"" + treeName+ "\" in file " + fileName + " contains no branches", 7006);
105 
106  const size_t n = branches->GetEntries();
107  m_branches.reserve(n);
108  for( size_t i = 0; i < n; ++i ) {
109  TBranch * b = dynamic_cast<TBranch*>( branches->At( i ) );
110  if (b==0) continue;
111  std::string const name( b->GetName() );
112  if ( name == "EventAux" ) continue;
113  size_type s = getTotalSize(b);
114  m_branches.push_back( BranchRecord(name, double(s[kCompressed])/double(m_nEvents), double(s[kUncompressed])/double(m_nEvents)) );
115  }
116  std::sort(m_branches.begin(),m_branches.end(),
117  boost::bind(std::greater<double>(),
118  boost::bind(&BranchRecord::compr_size,_1),
119  boost::bind(&BranchRecord::compr_size,_2))
120  );
121 
122  }
123 
125  std::sort(m_branches.begin(),m_branches.end(),
126  boost::bind(std::less<std::string>(),
127  boost::bind(&BranchRecord::name,_1),
128  boost::bind(&BranchRecord::name,_2))
129  );
130 
131  }
132 
133  namespace detail {
134  // format as product:label (type)
136  size_t b = br.fullName.find('_');
137  size_t e = br.fullName.rfind('_');
138  if (b==e) br.name=br.fullName;
139  else {
140  // remove type and process
141  br.name = br.fullName.substr(b+1,e-b-1);
142  // change label separator in :
143  e = br.name.rfind('_');
144  if (e!=std::string::npos) br.name.replace(e,1,":");
145  // add the type name
146  br.name.append(" ("+br.fullName.substr(0,b)+")");
147  }
148  }
149 
150  }
151 
153  std::for_each(m_branches.begin(),m_branches.end(),
155  }
156 
157 
158 
159  namespace detail {
160 
161  void dump(std::ostream& co, EdmEventSize::BranchRecord const & br) {
162  co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
163  }
164  }
165 
166 
167  void EdmEventSize::dump(std::ostream & co, bool header) const {
168  if (header) {
169  co << "File " << m_fileName << " Events " << m_nEvents << "\n";
170  co <<"Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
171  }
172  std::for_each(m_branches.begin(),m_branches.end(),
173  boost::bind(detail::dump,boost::ref(co),_1));
174  }
175 
176  namespace detail {
177 
178  struct Hist {
179 
180  explicit Hist(int itop) :
181  top(itop),
182  uncompressed( "uncompressed", "branch sizes", top, -0.5, - 0.5 + top ),
183  compressed( "compressed", "branch sizes", top, -0.5, - 0.5 + top ),
184  cxAxis(compressed.GetXaxis()),
185  uxAxis(uncompressed.GetXaxis()),
186  x(0) {}
187 
188  void fill(EdmEventSize::BranchRecord const & br) {
189  if ( x < top ) {
190  cxAxis->SetBinLabel( x + 1, br.name.c_str() );
191  uxAxis->SetBinLabel( x + 1, br.name.c_str() );
192  compressed.Fill( x, br.compr_size );
193  uncompressed.Fill( x, br.uncompr_size );
194  x++;
195  }
196  }
197 
198  void finalize() {
199  double mn = std::numeric_limits<double>::max();
200  for( int i = 1; i <= top; ++i ) {
201  double cm = compressed.GetMinimum( i ), um = uncompressed.GetMinimum( i );
202  if ( cm > 0 && cm < mn ) mn = cm;
203  if ( um > 0 && um < mn ) mn = um;
204  }
205  mn *= 0.8;
206  double mx = std::max( compressed.GetMaximum(), uncompressed.GetMaximum() );
207  mx *= 1.2;
208  uncompressed.SetMinimum( mn );
209  uncompressed.SetMaximum( mx );
210  compressed.SetMinimum( mn );
211  // compressed.SetMaximum( mx );
212  cxAxis->SetLabelOffset( -0.32 );
213  cxAxis->LabelsOption( "v" );
214  cxAxis->SetLabelSize( 0.03 );
215  uxAxis->SetLabelOffset( -0.32 );
216  uxAxis->LabelsOption( "v" );
217  uxAxis->SetLabelSize( 0.03 );
218  compressed.GetYaxis()->SetTitle( "Bytes" );
219  compressed.SetFillColor( kBlue );
220  compressed.SetLineWidth( 2 );
221  uncompressed.GetYaxis()->SetTitle( "Bytes" );
222  uncompressed.SetFillColor( kRed );
223  uncompressed.SetLineWidth( 2 );
224 
225  }
226 
227  int top;
230  TAxis * cxAxis;
231  TAxis * uxAxis;
232 
233  int x;
234  };
235 
236  }
237 
238  void EdmEventSize::produceHistos(std::string const & plot, std::string const & file, int top) const {
239  if (top==0) top = m_branches.size();
240  detail::Hist h(top);
241  std::for_each(m_branches.begin(),m_branches.end(),
242  boost::bind(&detail::Hist::fill,boost::ref(h),_1));
243  h.finalize();
244  if( !plot.empty() ) {
245  gROOT->SetStyle( "Plain" );
246  gStyle->SetOptStat( kFALSE );
247  gStyle->SetOptLogy();
248  TCanvas c;
249  h.uncompressed.Draw();
250  h.compressed.Draw( "same" );
251  c.SaveAs( plot.c_str() );
252  }
253  if ( !file.empty() ) {
254  TFile f( file.c_str(), "RECREATE" );
255  h.compressed.Write();
256  h.uncompressed.Write();
257  f.Close();
258  }
259 
260  }
261 
262 }
int i
Definition: DBlmapReader.cc:9
void dump(std::ostream &co, EdmEventSize::BranchRecord const &br)
void fill(EdmEventSize::BranchRecord const &br)
void dump(std::ostream &co, bool header=true) const
dump the ascii table on &quot;co&quot;
void parseFile(std::string const &fileName, std::string const &treeName="Events")
read file, compute branch size, sort by size
Definition: EdmEventSize.cc:81
uint16_t size_type
Indices
Definition: EdmEventSize.cc:30
EdmEventSize()
Constructor.
Definition: EdmEventSize.cc:73
tuple result
Definition: query.py:137
void shorterName(EdmEventSize::BranchRecord &br)
the information for each branch
Definition: EdmEventSize.h:36
double f[11][100]
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
string top
Definition: fff_deleter.py:272
double b
Definition: hdecay.h:120
tuple events
Definition: patZpeak.py:19
void formatNames()
transform Branch names in &quot;formatted&quot; prodcut identifiers
void sortAlpha()
sort by name
tuple size
Write out results.
void produceHistos(std::string const &plot, std::string const &file, int top=0) const
produce histograms and optionally write them in &quot;file&quot; or as &quot;plot&quot;