CMS 3D CMS Logo

EdmEventSize.cc
Go to the documentation of this file.
1 
6 #include <valarray>
7 #include <functional>
8 #include <algorithm>
9 #include <boost/bind.hpp>
10 #include <ostream>
11 #include <limits>
12 #include <cassert>
13 
14 #include "Rtypes.h"
15 #include "TROOT.h"
16 #include "TFile.h"
17 #include "TTree.h"
18 #include "TStyle.h"
19 #include "TObjArray.h"
20 #include "TBranch.h"
21 #include "TH1.h"
22 #include "TCanvas.h"
23 #include "Riostream.h"
24 
25 #include "TBufferFile.h"
26 
27 namespace {
28 
29  enum Indices { kUncompressed, kCompressed };
30 
31  typedef std::valarray<Long64_t> size_type;
32 
33  size_type getBasketSize(TBranch*);
34 
35  size_type getBasketSize(TObjArray* branches) {
36  size_type result(static_cast<Long64_t>(0), 2);
37  size_t n = branches->GetEntries();
38  for (size_t i = 0; i < n; ++i) {
39  TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
40  assert(b != nullptr);
41  result += getBasketSize(b);
42  }
43  return result;
44  }
45 
46  size_type getBasketSize(TBranch* b) {
47  size_type result(static_cast<Long64_t>(0), 2);
48  if (b != nullptr) {
49  if (b->GetZipBytes() > 0) {
50  result[kUncompressed] = b->GetTotBytes();
51  result[kCompressed] = b->GetZipBytes();
52  } else {
53  result[kUncompressed] = b->GetTotalSize();
54  result[kCompressed] = b->GetTotalSize();
55  }
56  result += getBasketSize(b->GetListOfBranches());
57  }
58  return result;
59  }
60 
61  size_type getTotalSize(TBranch* br) {
62  TBufferFile buf(TBuffer::kWrite, 10000);
63  TBranch::Class()->WriteBuffer(buf, br);
64  size_type size = getBasketSize(br);
65  if (br->GetZipBytes() > 0)
66  size[kUncompressed] += buf.Length();
67  return size;
68  }
69 } // namespace
70 
71 namespace perftools {
72 
73  EdmEventSize::EdmEventSize() : m_nEvents(0) {}
74 
76  parseFile(fileName);
77  }
78 
79  void EdmEventSize::parseFile(std::string const& fileName, std::string const& treeName) {
81  m_branches.clear();
82 
83  TFile* file = TFile::Open(fileName.c_str());
84  if (file == nullptr || (!(*file).IsOpen()))
85  throw Error("unable to open data file " + fileName, 7002);
86 
87  TObject* o = file->Get(treeName.c_str());
88  if (o == nullptr)
89  throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
90 
91  TTree* events = dynamic_cast<TTree*>(o);
92  if (events == nullptr)
93  throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
94 
95  m_nEvents = events->GetEntries();
96  if (m_nEvents == 0)
97  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
98 
99  TObjArray* branches = events->GetListOfBranches();
100  if (branches == nullptr)
101  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
102 
103  const size_t n = branches->GetEntries();
104  m_branches.reserve(n);
105  for (size_t i = 0; i < n; ++i) {
106  TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
107  if (b == nullptr)
108  continue;
109  std::string const name(b->GetName());
110  if (name == "EventAux")
111  continue;
112  size_type s = getTotalSize(b);
113  m_branches.push_back(
114  BranchRecord(name, double(s[kCompressed]) / double(m_nEvents), double(s[kUncompressed]) / double(m_nEvents)));
115  }
116  std::sort(m_branches.begin(),
117  m_branches.end(),
118  boost::bind(std::greater<double>(),
119  boost::bind(&BranchRecord::compr_size, _1),
120  boost::bind(&BranchRecord::compr_size, _2)));
121  }
122 
124  std::sort(
125  m_branches.begin(),
126  m_branches.end(),
127  boost::bind(
128  std::less<std::string>(), boost::bind(&BranchRecord::name, _1), boost::bind(&BranchRecord::name, _2)));
129  }
130 
131  namespace detail {
132  // format as product:label (type)
134  size_t b = br.fullName.find('_');
135  size_t e = br.fullName.rfind('_');
136  if (b == e)
137  br.name = br.fullName;
138  else {
139  // remove type and process
140  br.name = br.fullName.substr(b + 1, e - b - 1);
141  // change label separator in :
142  e = br.name.rfind('_');
143  if (e != std::string::npos)
144  br.name.replace(e, 1, ":");
145  // add the type name
146  br.name.append(" (" + br.fullName.substr(0, b) + ")");
147  }
148  }
149 
150  } // namespace detail
151 
152  void EdmEventSize::formatNames() { std::for_each(m_branches.begin(), m_branches.end(), &detail::shorterName); }
153 
154  namespace detail {
155 
156  void dump(std::ostream& co, EdmEventSize::BranchRecord const& br) {
157  co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
158  }
159  } // namespace detail
160 
161  void EdmEventSize::dump(std::ostream& co, bool header) const {
162  if (header) {
163  co << "File " << m_fileName << " Events " << m_nEvents << "\n";
164  co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
165  }
166  std::for_each(m_branches.begin(), m_branches.end(), boost::bind(detail::dump, boost::ref(co), _1));
167  }
168 
169  namespace detail {
170 
171  struct Hist {
172  explicit Hist(int itop)
173  : top(itop),
174  uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top),
175  compressed("compressed", "branch sizes", top, -0.5, -0.5 + top),
176  cxAxis(compressed.GetXaxis()),
177  uxAxis(uncompressed.GetXaxis()),
178  x(0) {}
179 
181  if (x < top) {
182  cxAxis->SetBinLabel(x + 1, br.name.c_str());
183  uxAxis->SetBinLabel(x + 1, br.name.c_str());
184  compressed.Fill(x, br.compr_size);
185  uncompressed.Fill(x, br.uncompr_size);
186  x++;
187  }
188  }
189 
190  void finalize() {
191  double mn = std::numeric_limits<double>::max();
192  for (int i = 1; i <= top; ++i) {
193  double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
194  if (cm > 0 && cm < mn)
195  mn = cm;
196  if (um > 0 && um < mn)
197  mn = um;
198  }
199  mn *= 0.8;
200  double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
201  mx *= 1.2;
202  uncompressed.SetMinimum(mn);
203  uncompressed.SetMaximum(mx);
204  compressed.SetMinimum(mn);
205  // compressed.SetMaximum( mx );
206  cxAxis->SetLabelOffset(-0.32);
207  cxAxis->LabelsOption("v");
208  cxAxis->SetLabelSize(0.03);
209  uxAxis->SetLabelOffset(-0.32);
210  uxAxis->LabelsOption("v");
211  uxAxis->SetLabelSize(0.03);
212  compressed.GetYaxis()->SetTitle("Bytes");
213  compressed.SetFillColor(kBlue);
214  compressed.SetLineWidth(2);
215  uncompressed.GetYaxis()->SetTitle("Bytes");
216  uncompressed.SetFillColor(kRed);
217  uncompressed.SetLineWidth(2);
218  }
219 
220  int top;
223  TAxis* cxAxis;
224  TAxis* uxAxis;
225 
226  int x;
227  };
228 
229  } // namespace detail
230 
231  void EdmEventSize::produceHistos(std::string const& plot, std::string const& file, int top) const {
232  if (top == 0)
233  top = m_branches.size();
234  detail::Hist h(top);
235  std::for_each(m_branches.begin(), m_branches.end(), boost::bind(&detail::Hist::fill, boost::ref(h), _1));
236  h.finalize();
237  if (!plot.empty()) {
238  gROOT->SetStyle("Plain");
239  gStyle->SetOptStat(kFALSE);
240  gStyle->SetOptLogy();
241  TCanvas c;
242  h.uncompressed.Draw();
243  h.compressed.Draw("same");
244  c.SaveAs(plot.c_str());
245  }
246  if (!file.empty()) {
247  TFile f(file.c_str(), "RECREATE");
248  h.compressed.Write();
249  h.uncompressed.Write();
250  f.Close();
251  }
252  }
253 
254 } // namespace perftools
size
Write out results.
edm::ErrorSummaryEntry Error
FWCore Framework interface EventSetupRecordImplementation h
Helper function to determine trigger accepts.
void dump(std::ostream &co, EdmEventSize::BranchRecord const &br)
void fill(EdmEventSize::BranchRecord const &br)
void dump(std::ostream &co, bool header=true) const
dump the ascii table on "co"
void parseFile(std::string const &fileName, std::string const &treeName="Events")
read file, compute branch size, sort by size
Definition: EdmEventSize.cc:79
uint16_t size_type
Indices
Definition: EdmEventSize.cc:29
EdmEventSize()
Constructor.
Definition: EdmEventSize.cc:73
void shorterName(EdmEventSize::BranchRecord &br)
the information for each branch
Definition: EdmEventSize.h:34
double f[11][100]
double b
Definition: hdecay.h:118
void formatNames()
transform Branch names in "formatted" prodcut identifiers
void sortAlpha()
sort by name
void produceHistos(std::string const &plot, std::string const &file, int top=0) const
produce histograms and optionally write them in "file" or as "plot"