CMS 3D CMS Logo

EdmEventSize.cc
Go to the documentation of this file.
1 
6 #include <valarray>
7 #include <functional>
8 #include <algorithm>
9 #include <ostream>
10 #include <limits>
11 #include <cassert>
12 
13 #include "Rtypes.h"
14 #include "TROOT.h"
15 #include "TFile.h"
16 #include "TTree.h"
17 #include "TStyle.h"
18 #include "TObjArray.h"
19 #include "TBranch.h"
20 #include "TH1.h"
21 #include "TCanvas.h"
22 #include "Riostream.h"
23 
24 #include "TBufferFile.h"
25 
26 namespace {
27 
28  enum Indices { kUncompressed, kCompressed };
29 
30  typedef std::valarray<Long64_t> size_type;
31 
32  size_type getBasketSize(TBranch*);
33 
34  size_type getBasketSize(TObjArray* branches) {
35  size_type result(static_cast<Long64_t>(0), 2);
36  size_t n = branches->GetEntries();
37  for (size_t i = 0; i < n; ++i) {
38  TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
39  assert(b != nullptr);
40  result += getBasketSize(b);
41  }
42  return result;
43  }
44 
45  size_type getBasketSize(TBranch* b) {
46  size_type result(static_cast<Long64_t>(0), 2);
47  if (b != nullptr) {
48  if (b->GetZipBytes() > 0) {
49  result[kUncompressed] = b->GetTotBytes();
50  result[kCompressed] = b->GetZipBytes();
51  } else {
52  result[kUncompressed] = b->GetTotalSize();
53  result[kCompressed] = b->GetTotalSize();
54  }
55  result += getBasketSize(b->GetListOfBranches());
56  }
57  return result;
58  }
59 
60  size_type getTotalSize(TBranch* br) {
61  TBufferFile buf(TBuffer::kWrite, 10000);
62  TBranch::Class()->WriteBuffer(buf, br);
63  size_type size = getBasketSize(br);
64  if (br->GetZipBytes() > 0)
65  size[kUncompressed] += buf.Length();
66  return size;
67  }
68 } // namespace
69 
70 namespace perftools {
71 
72  EdmEventSize::EdmEventSize() : m_nEvents(0) {}
73 
74  EdmEventSize::EdmEventSize(std::string const& fileName, std::string const& treeName) : m_nEvents(0) {
76  }
77 
78  void EdmEventSize::parseFile(std::string const& fileName, std::string const& treeName) {
80  m_branches.clear();
81 
82  TFile* file = TFile::Open(fileName.c_str());
83  if (file == nullptr || (!(*file).IsOpen()))
84  throw Error("unable to open data file " + fileName, 7002);
85 
86  TObject* o = file->Get(treeName.c_str());
87  if (o == nullptr)
88  throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
89 
90  TTree* events = dynamic_cast<TTree*>(o);
91  if (events == nullptr)
92  throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
93 
94  m_nEvents = events->GetEntries();
95  if (m_nEvents == 0)
96  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
97 
98  TObjArray* branches = events->GetListOfBranches();
99  if (branches == nullptr)
100  throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
101 
102  const size_t n = branches->GetEntries();
103  m_branches.reserve(n);
104  for (size_t i = 0; i < n; ++i) {
105  TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
106  if (b == nullptr)
107  continue;
108  std::string const name(b->GetName());
109  if (name == "EventAux")
110  continue;
111  size_type s = getTotalSize(b);
112  m_branches.push_back(
113  BranchRecord(name, double(s[kCompressed]) / double(m_nEvents), double(s[kUncompressed]) / double(m_nEvents)));
114  }
115  std::sort(m_branches.begin(),
116  m_branches.end(),
117  std::bind(std::greater<double>(),
118  std::bind(&BranchRecord::compr_size, std::placeholders::_1),
119  std::bind(&BranchRecord::compr_size, std::placeholders::_2)));
120  }
121 
123  std::sort(m_branches.begin(),
124  m_branches.end(),
125  std::bind(std::less<std::string>(),
126  std::bind(&BranchRecord::name, std::placeholders::_1),
127  std::bind(&BranchRecord::name, std::placeholders::_2)));
128  }
129 
130  namespace detail {
131  // format as product:label (type)
133  size_t b = br.fullName.find('_');
134  size_t e = br.fullName.rfind('_');
135  if (b == e)
136  br.name = br.fullName;
137  else {
138  // remove type and process
139  br.name = br.fullName.substr(b + 1, e - b - 1);
140  // change label separator in :
141  e = br.name.rfind('_');
142  if (e != std::string::npos)
143  br.name.replace(e, 1, ":");
144  // add the type name
145  br.name.append(" (" + br.fullName.substr(0, b) + ")");
146  }
147  }
148 
149  } // namespace detail
150 
151  void EdmEventSize::formatNames() { std::for_each(m_branches.begin(), m_branches.end(), &detail::shorterName); }
152 
153  namespace detail {
154 
155  void dump(std::ostream& co, EdmEventSize::BranchRecord const& br) {
156  co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
157  }
158  } // namespace detail
159 
160  void EdmEventSize::dump(std::ostream& co, bool header) const {
161  if (header) {
162  co << "File " << m_fileName << " Events " << m_nEvents << "\n";
163  co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
164  }
165  std::for_each(m_branches.begin(), m_branches.end(), std::bind(detail::dump, std::ref(co), std::placeholders::_1));
166  }
167 
168  namespace detail {
169 
170  struct Hist {
171  explicit Hist(int itop)
172  : top(itop),
173  uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top),
174  compressed("compressed", "branch sizes", top, -0.5, -0.5 + top),
175  cxAxis(compressed.GetXaxis()),
176  uxAxis(uncompressed.GetXaxis()),
177  x(0) {}
178 
180  if (x < top) {
181  cxAxis->SetBinLabel(x + 1, br.name.c_str());
182  uxAxis->SetBinLabel(x + 1, br.name.c_str());
183  compressed.Fill(x, br.compr_size);
184  uncompressed.Fill(x, br.uncompr_size);
185  x++;
186  }
187  }
188 
189  void finalize() {
190  double mn = std::numeric_limits<double>::max();
191  for (int i = 1; i <= top; ++i) {
192  double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
193  if (cm > 0 && cm < mn)
194  mn = cm;
195  if (um > 0 && um < mn)
196  mn = um;
197  }
198  mn *= 0.8;
199  double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
200  mx *= 1.2;
201  uncompressed.SetMinimum(mn);
202  uncompressed.SetMaximum(mx);
203  compressed.SetMinimum(mn);
204  // compressed.SetMaximum( mx );
205  cxAxis->SetLabelOffset(-0.32);
206  cxAxis->LabelsOption("v");
207  cxAxis->SetLabelSize(0.03);
208  uxAxis->SetLabelOffset(-0.32);
209  uxAxis->LabelsOption("v");
210  uxAxis->SetLabelSize(0.03);
211  compressed.GetYaxis()->SetTitle("Bytes");
212  compressed.SetFillColor(kBlue);
213  compressed.SetLineWidth(2);
214  uncompressed.GetYaxis()->SetTitle("Bytes");
215  uncompressed.SetFillColor(kRed);
216  uncompressed.SetLineWidth(2);
217  }
218 
219  int top;
222  TAxis* cxAxis;
223  TAxis* uxAxis;
224 
225  int x;
226  };
227 
228  } // namespace detail
229 
230  void EdmEventSize::produceHistos(std::string const& plot, std::string const& file, int top) const {
231  if (top == 0)
232  top = m_branches.size();
233  detail::Hist h(top);
234  std::for_each(
235  m_branches.begin(), m_branches.end(), std::bind(&detail::Hist::fill, std::ref(h), std::placeholders::_1));
236  h.finalize();
237  if (!plot.empty()) {
238  gROOT->SetStyle("Plain");
239  gStyle->SetOptStat(kFALSE);
240  gStyle->SetOptLogy();
241  TCanvas c;
242  h.uncompressed.Draw();
243  h.compressed.Draw("same");
244  c.SaveAs(plot.c_str());
245  }
246  if (!file.empty()) {
247  TFile f(file.c_str(), "RECREATE");
248  h.compressed.Write();
249  h.uncompressed.Write();
250  f.Close();
251  }
252  }
253 
254 } // namespace perftools
size
Write out results.
edm::ErrorSummaryEntry Error
void dump(std::ostream &co, EdmEventSize::BranchRecord const &br)
void fill(EdmEventSize::BranchRecord const &br)
void parseFile(std::string const &fileName, std::string const &treeName="Events")
read file, compute branch size, sort by size
Definition: EdmEventSize.cc:78
assert(be >=bs)
__host__ __device__ VT * co
Definition: prefixScan.h:47
uint16_t size_type
Indices
Definition: EdmEventSize.cc:28
EdmEventSize()
Constructor.
Definition: EdmEventSize.cc:72
void shorterName(EdmEventSize::BranchRecord &br)
the information for each branch
Definition: EdmEventSize.h:34
double f[11][100]
void dump(std::ostream &co, bool header=true) const
dump the ascii table on "co"
void produceHistos(std::string const &plot, std::string const &file, int top=0) const
produce histograms and optionally write them in "file" or as "plot"
double b
Definition: hdecay.h:120
void formatNames()
transform Branch names in "formatted" prodcut identifiers
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
void sortAlpha()
sort by name
int events