CMS 3D CMS Logo

FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 // C++ headers
5 #include <cmath>
6 #include <string>
7 #include <map>
8 #include <unordered_map>
9 #include <chrono>
10 #include <mutex>
11 #include <unistd.h>
12 
13 // boost headers
14 #include <boost/chrono.hpp>
15 
16 // tbb headers
17 #include <tbb/concurrent_unordered_set.h>
18 #include <tbb/enumerable_thread_specific.h>
19 
20 
21 // CMSSW headers
41 
42 
43 /*
44 procesing time is divided into
45  - source
46  - event processing, sum of the time spent in all the modules
47 */
48 
49 /*
50 Assuming an HLT process with ~2500 modules and ~500 paths, tracking each step (with two calls per step, to start and stop the timer)
51 with std::chrono::high_resolution_clock gives a per-event overhead of 1 ms
52 
53 Detailed informations on different timers can be extracted running $CMSSW_RELEASE_BASE/test/$SCRAM_ARCH/testChrono .
54 
55 
56 Timer per-call overhead on SLC5:
57 
58 Linux 2.6.18-371.1.2.el5 x86_64
59 glibc version: 2.5
60 clock source: unknown
61 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
62 
63 Performance of std::chrono::high_resolution_clock
64  Average time per call: 317.0 ns
65  Clock tick period: 1.0 ns
66  Measured resolution: 1000.0 ns (median: 1000.0 ns) (sigma: 199.4 ns) (average: 1007.6 +/- 0.4 ns)
67 
68 
69 Timer per-call overhead on SLC6 (virtualized):
70 
71 Linux 2.6.32-358.23.2.el6.x86_64 x86_64
72 glibc version: 2.12
73 clock source: kvm-clock
74 For each timer the resolution reported is the MINIMUM (MEDIAN) (MEAN +/- its STDDEV) of the increments measured during the test.
75 
76 Performance of std::chrono::high_resolution_clock
77  Average time per call: 351.2 ns
78  Clock tick period: 1.0 ns
79  Measured resolution: 1.0 ns (median: 358.0 ns) (sigma: 30360.8 ns) (average: 685.7 +/- 42.4 ns)
80 */
81 
82 
84 public:
87 
88 private:
89  double queryModuleTime_(edm::StreamID, unsigned int id) const;
90 
91 public:
92  // query the time spent in a module/path/process (available after it has run)
93  double querySourceTime(edm::StreamID) const;
94  double queryEventTime(edm::StreamID) const;
95  double queryEventTime(edm::StreamID, std::string const& process) const;
97  double queryModuleTime(edm::StreamID, unsigned int id) const;
98  double queryModuleTimeByLabel(edm::StreamID, std::string const& module) const;
99  double queryModuleTimeByLabel(edm::StreamID, std::string const& process, const std::string & module) const;
100  double queryPathTime(edm::StreamID, std::string const& path) const;
101  double queryPathTime(edm::StreamID, std::string const& process, std::string const& path) const;
102  double queryHighlightTime(edm::StreamID sid, std::string const& label) const;
103 
104 private:
105  void ignoredSignal(std::string signal) const;
106  void unsupportedSignal(std::string signal) const;
107 
108  // these signal pairs are not guaranteed to happen in the same thread
109 
111 
113  void postBeginJob();
114 
115  void postEndJob();
116 
119 
120  void preGlobalEndRun(edm::GlobalContext const&);
122 
125 
126  void preStreamEndRun(edm::StreamContext const&);
128 
131 
134 
137 
140 
141  void preEvent(edm::StreamContext const&);
142  void postEvent(edm::StreamContext const&);
143 
144  void prePathEvent(edm::StreamContext const&, edm::PathContext const&);
146 
149 
150  // these signal pairs are guaranteed to be called within the same thread
151 
152  //void preOpenFile(std::string const&, bool);
153  //void postOpenFile(std::string const&, bool);
154 
155  //void preCloseFile(std::string const&, bool);
156  //void postCloseFile(std::string const&, bool);
157 
159  //void postSourceConstruction(edm::ModuleDescription const&);
160 
161  void preSourceRun();
162  void postSourceRun();
163 
164  void preSourceLumi();
165  void postSourceLumi();
166 
169 
170  //void preModuleConstruction(edm::ModuleDescription const&);
171  //void postModuleConstruction(edm::ModuleDescription const&);
172 
173  //void preModuleBeginJob(edm::ModuleDescription const&);
174  //void postModuleBeginJob(edm::ModuleDescription const&);
175 
176  //void preModuleEndJob(edm::ModuleDescription const&);
177  //void postModuleEndJob(edm::ModuleDescription const&);
178 
179  //void preModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
180  //void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
181 
182  //void preModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
183  //void postModuleEndStream(edm::StreamContext const&, edm::ModuleCallingContext const&);
184 
187 
190 
193 
196 
199 
202 
205 
208 
211 
214 
217 
218 public:
219  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
220 
221 private:
222  // highlight a group of modules
223  struct GroupOfModules {
224  public:
226  std::vector<unsigned int> modules;
227  };
228 
229  // resources being monitored by the service
230  struct Resources {
231  public:
232  Resources();
233  void reset();
235  Resources operator+(Resources const& other) const;
236 
237  public:
242  };
243 
245  public:
247  void reset();
249  ResourcesPerModule operator+(ResourcesPerModule const& other) const;
250 
251  public:
253  unsigned events;
254  };
255 
257  public:
259  void reset();
261  ResourcesPerPath operator+(ResourcesPerPath const& other) const;
262 
263  public:
264  Resources active; // resources used by all modules on this path
265  Resources total; // resources used by all modules on this path, and their dependencies
266  unsigned last; // one-past-the last module that ran on this path
267  bool status; // whether the path accepted or rejected the event
268  };
269 
271  public:
273  void reset();
276 
277  public:
279  std::vector<ResourcesPerPath> paths;
280  std::vector<ResourcesPerPath> endpaths;
281  };
282 
284  public:
285  ResourcesPerJob();
286  ResourcesPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
287  void reset();
289  ResourcesPerJob operator+(ResourcesPerJob const& other) const;
290 
291  public:
293  std::vector<Resources> highlight;
294  std::vector<ResourcesPerModule> modules;
295  std::vector<ResourcesPerProcess> processes;
296  unsigned events;
297  };
298 
299 
300  // per-thread measurements
301  struct Measurement {
302  public:
303  Measurement();
304  void measure();
305  void measure_and_store(Resources & store);
306 
307  public:
308  #ifdef DEBUG_THREAD_CONCURRENCY
310  #endif // DEBUG_THREAD_CONCURRENCY
311  boost::chrono::thread_clock::time_point time_thread;
312  boost::chrono::high_resolution_clock::time_point time_real;
315  };
316 
317 
318  // plot ranges and resolution
319  struct PlotRanges {
320  double time_range;
322  double memory_range;
324  };
325 
326  // plots associated to each module or other element (path, process, etc)
328  public:
329  PlotsPerElement();
330  void reset();
331  void book(DQMStore::IBooker &, std::string const& name, std::string const& title, PlotRanges const& ranges, unsigned int lumisections, bool byls);
332  void fill(Resources const&, unsigned int lumisection);
333  void fill_fraction(Resources const&, Resources const&, unsigned int lumisection);
334 
335  private:
336  // resources spent in the module
337  TH1F * time_thread_;
338  TProfile * time_thread_byls_;
339  TH1F * time_real_;
340  TProfile * time_real_byls_;
341  TH1F * allocated_;
342  TProfile * allocated_byls_;
343  TH1F * deallocated_;
344  TProfile * deallocated_byls_;
345  };
346 
347  // plots associated to each path or endpath
348  class PlotsPerPath {
349  public:
350  PlotsPerPath();
351  void reset();
352  void book(DQMStore::IBooker &, std::string const &, ProcessCallGraph const&, ProcessCallGraph::PathType const&, PlotRanges const& ranges, unsigned int lumisections, bool byls);
353  void fill(ProcessCallGraph::PathType const&, ResourcesPerJob const&, ResourcesPerPath const&, unsigned int lumisection);
354 
355  private:
356  // resources spent in all the modules in the path, including their dependencies
358 
359  // Note:
360  // a TH1F has 7 significant digits, while a 24-hour long run could process
361  // order of 10 billion events; a 64-bit long integer would work and might
362  // be better suited than a double, but there is no "TH1L" in ROOT.
363 
364  // how many times each module and their dependencies has run
366  // resources spent in each module and their dependencies
371  };
372 
374  public:
376  void reset();
378  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
379  unsigned int lumisections, bool bypath, bool byls);
380  void fill(ProcessCallGraph::ProcessType const&, ResourcesPerJob const&, ResourcesPerProcess const&, unsigned int ls);
381 
382  private:
383  // resources spent in all the modules of the (sub)process
385  // resources spent in each path and endpath
386  std::vector<PlotsPerPath> paths_;
387  std::vector<PlotsPerPath> endpaths_;
388  };
389 
390  class PlotsPerJob {
391  public:
392  PlotsPerJob(ProcessCallGraph const& job, std::vector<GroupOfModules> const& groups);
393  void reset();
394  void book(DQMStore::IBooker &, ProcessCallGraph const&, std::vector<GroupOfModules> const&,
395  PlotRanges const& event_ranges, PlotRanges const& path_ranges,
396  PlotRanges const& module_ranges, unsigned int lumisections,
397  bool bymodule, bool bypath, bool byls);
398  void fill(ProcessCallGraph const&, ResourcesPerJob const&, unsigned int ls);
399 
400  private:
401  // resources spent in all the modules of the job
403  // resources spent in the highlighted modules
404  std::vector<PlotsPerElement> highlight_;
405  // resources spent in each module
406  std::vector<PlotsPerElement> modules_;
407  // resources spent in each (sub)process
408  std::vector<PlotsPerProcess> processes_;
409  };
410 
411 
412  // keep track of the dependencies among modules
414 
415  // per-stream information
416  std::vector<ResourcesPerJob> streams_;
417  std::vector<PlotsPerJob> stream_plots_;
418 
419  // summary data
420  ResourcesPerJob job_summary_; // whole event time accounting per-job
421  std::vector<ResourcesPerJob> run_summary_; // whole event time accounting per-run
422  std::mutex summary_mutex_; // synchronise access to the summary objects across different threads
423 
424  // per-thread quantities, lazily allocated
425  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
427 
428  // atomic variables to keep track of the completion of each step, process by process
429  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;
430  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_lumisection_check_;
431  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_run_check_;
432  std::unique_ptr<std::atomic<unsigned int>[]> subprocess_global_run_check_;
433 
434  // retrieve the current thread's per-thread quantities
435  Measurement & thread();
436 
437  // job configuration
438  unsigned int concurrent_runs_;
439  unsigned int concurrent_streams_;
440  unsigned int concurrent_threads_;
441 
442  // logging configuration
443  const bool print_event_summary_; // print the time spent in each process, path and module after every event
444  const bool print_run_summary_; // print the time spent in each process, path and module for each run
445  const bool print_job_summary_; // print the time spent in each process, path and module for the whole job
446 
447  // dqm configuration
448  unsigned int module_id_; // pseudo module id for the FastTimerService, needed by the thread-safe DQMStore
449 
450  bool enable_dqm_; // non const, depends on the availability of the DQMStore
452  const bool enable_dqm_bypath_;
453  const bool enable_dqm_byls_;
455 
459  const unsigned int dqm_lumisections_range_;
461 
462  std::vector<edm::ParameterSet>
463  highlight_module_psets_; // non-const, cleared in postBeginJob()
464  std::vector<GroupOfModules> highlight_modules_; // non-const, filled in postBeginJob()
465 
466  // log unsupported signals
467  mutable tbb::concurrent_unordered_set<std::string> unsupported_signals_; // keep track of unsupported signals received
468 
469  // print the resource usage summary for en event, a run, or the while job
470  template <typename T>
471  void printHeader(T& out, std::string const & label) const;
472 
473  template <typename T>
474  void printEventHeader(T& out, std::string const & label) const;
475 
476  template <typename T>
477  void printEventLine(T& out, Resources const& data, std::string const & label) const;
478 
479  template <typename T>
480  void printEvent(T& out, ResourcesPerJob const&) const;
481 
482  template <typename T>
483  void printSummaryHeader(T& out, std::string const & label, bool detaile) const;
484 
485  template <typename T>
486  void printSummaryLine(T& out, Resources const& data, uint64_t events, std::string const& label) const;
487 
488  template <typename T>
489  void printSummaryLine(T& out, Resources const& data, uint64_t events, uint64_t active, std::string const& label) const;
490 
491  template <typename T>
492  void printSummary(T& out, ResourcesPerJob const&, std::string const& label) const;
493 
494  // check if this is the first process being signalled
497 
498  // check if this is the lest process being signalled
499  bool isLastSubprocess(std::atomic<unsigned int>& check);
500 };
501 
502 #endif // ! FastTimerService_h
void preGlobalBeginRun(edm::GlobalContext const &)
void preModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_run_summary_
static boost::mutex mutex
Definition: Proxy.cc:11
void postStreamBeginRun(edm::StreamContext const &)
double queryEventTime(edm::StreamID) const
void postGlobalEndLumi(edm::GlobalContext const &)
void postGlobalBeginLumi(edm::GlobalContext const &)
void postStreamEndLumi(edm::StreamContext const &)
std::vector< edm::ParameterSet > highlight_module_psets_
void postModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preGlobalEndRun(edm::GlobalContext const &)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
unsigned int concurrent_threads_
void postEventReadFromSource(edm::StreamContext const &, edm::ModuleCallingContext const &)
double queryHighlightTime(edm::StreamID sid, std::string const &label) const
const PlotRanges dqm_event_ranges_
void postModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< GroupOfModules > highlight_modules_
ResourcesPerJob job_summary_
std::vector< unsigned int > modules
void preGlobalEndLumi(edm::GlobalContext const &)
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
std::vector< ResourcesPerPath > endpaths
double queryModuleTime_(edm::StreamID, unsigned int id) const
bool isFirstSubprocess(edm::StreamContext const &)
const bool enable_dqm_bynproc_
unsigned int concurrent_runs_
boost::chrono::nanoseconds time_real
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_global_run_check_
std::vector< ResourcesPerJob > streams_
void postPathEvent(edm::StreamContext const &, edm::PathContext const &, edm::HLTPathStatus const &)
std::vector< PlotsPerElement > highlight_
void preModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
const bool print_job_summary_
const bool enable_dqm_bypath_
Measurement & thread()
tbb::enumerable_thread_specific< Measurement, tbb::cache_aligned_allocator< Measurement >, tbb::ets_key_per_instance > threads_
void postStreamBeginLumi(edm::StreamContext const &)
void printSummary(T &out, ResourcesPerJob const &, std::string const &label) const
const PlotRanges dqm_path_ranges_
boost::chrono::thread_clock::time_point time_thread
std::vector< ResourcesPerModule > modules
void printEvent(T &out, ResourcesPerJob const &) const
void printHeader(T &out, std::string const &label) const
void postModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::mutex summary_mutex_
std::vector< PlotsPerJob > stream_plots_
void printSummaryHeader(T &out, std::string const &label, bool detaile) const
double queryModuleTime(edm::StreamID, const edm::ModuleDescription &module) const
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
unsigned int module_id_
std::vector< ResourcesPerJob > run_summary_
void printEventHeader(T &out, std::string const &label) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
void preModuleStreamEndLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postEvent(edm::StreamContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_lumisection_check_
void postGlobalEndRun(edm::GlobalContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_event_check_
const unsigned int dqm_lumisections_range_
void postStreamEndRun(edm::StreamContext const &)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< PlotsPerElement > modules_
std::string dqm_path_
void printSummaryLine(T &out, Resources const &data, uint64_t events, std::string const &label) const
void printEventLine(T &out, Resources const &data, std::string const &label) const
std::vector< Resources > highlight
const bool enable_dqm_bymodule_
string ranges
Definition: diffTwoXMLs.py:78
const PlotRanges dqm_module_ranges_
std::vector< PlotsPerPath > endpaths_
const bool enable_dqm_byls_
def ls(path, rec=False)
Definition: eostools.py:348
std::vector< PlotsPerProcess > processes_
void postModuleEventDelayedGet(edm::StreamContext const &, edm::ModuleCallingContext const &)
const bool print_event_summary_
boost::chrono::high_resolution_clock::time_point time_real
unsigned long long uint64_t
Definition: Time.h:15
void postModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
double querySourceTime(edm::StreamID) const
void preModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamEndRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void postModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceEvent(edm::StreamID)
boost::chrono::nanoseconds time_thread
void preEvent(edm::StreamContext const &)
void postGlobalBeginRun(edm::GlobalContext const &)
void preStreamBeginLumi(edm::StreamContext const &)
void postSourceEvent(edm::StreamID)
void preModuleStreamBeginLumi(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndRun(edm::StreamContext const &)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
void preModuleStreamBeginRun(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preStreamEndLumi(edm::StreamContext const &)
void unsupportedSignal(std::string signal) const
void postModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preModuleGlobalEndLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void ignoredSignal(std::string signal) const
double queryPathTime(edm::StreamID, std::string const &path) const
void preGlobalBeginLumi(edm::GlobalContext const &)
void preModuleEventPrefetching(edm::StreamContext const &, edm::ModuleCallingContext const &)
void preSourceConstruction(edm::ModuleDescription const &)
void preModuleGlobalEndRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
tbb::concurrent_unordered_set< std::string > unsupported_signals_
std::vector< PlotsPerPath > paths_
void postModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::vector< ResourcesPerProcess > processes
double queryModuleTimeByLabel(edm::StreamID, std::string const &module) const
boost::date_time::subsecond_duration< boost::posix_time::time_duration, 1000000000 > nanoseconds
def check(config)
Definition: trackerTree.py:14
ProcessCallGraph callgraph_
long double T
void postModuleGlobalBeginLumi(edm::GlobalContext const &, edm::ModuleCallingContext const &)
void preStreamBeginRun(edm::StreamContext const &)
void reset(double vett[256])
Definition: TPedValues.cc:11
std::vector< ResourcesPerPath > paths
Definition: vlib.h:208
void preModuleGlobalBeginRun(edm::GlobalContext const &, edm::ModuleCallingContext const &)
std::unique_ptr< std::atomic< unsigned int >[]> subprocess_run_check_
unsigned int concurrent_streams_
bool isLastSubprocess(std::atomic< unsigned int > &check)
void preallocate(edm::service::SystemBounds const &)
Basic3DVector & operator+=(const Basic3DVector< U > &p)