CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
FastTimerService.h
Go to the documentation of this file.
1 #ifndef FastTimerService_h
2 #define FastTimerService_h
3 
4 //system headers
5 #ifdef __linux
6 #include <time.h>
7 #else
8 typedef int clockid_t;
9 #endif
10 
11 /* Darwin system headers */
12 #if defined(__APPLE__) || defined(__MACH__)
13 #include <mach/clock.h>
14 #include <mach/mach.h>
15 #endif // defined(__APPLE__) || defined(__MACH__)
16 
17 // C++ headers
18 #include <cmath>
19 #include <string>
20 #include <map>
21 #include <unordered_map>
22 #include <unistd.h>
23 
24 // CMSSW headers
36 
37 
38 /*
39 procesing time is diveded into
40  - source
41  - pre-event processing overhead
42  - event processing
43  - post-event processing overhead
44 
45 until lumi-processing and run-processing are taken into account, they will count as inter-event overhead
46 
47 event processing time is diveded into
48  - trigger processing (from the begin of the first path to the end of the last path)
49  - trigger overhead
50  - endpath processing (from the begin of the first endpath to the end of the last endpath)
51  - endpath overhead
52 */
53 
54 /*
55 Timer per-call overhead (on lxplus, SLC 5.7):
56 
57 Test results for "clock_gettime(CLOCK_THREAD_CPUTIME_ID, & value)"
58  - average time per call: 340 ns
59  - resolution: ? ns
60 
61 Test results for "clock_gettime(CLOCK_PROCESS_CPUTIME_ID, & value)"
62  - average time per call: 360 ns
63  - resolution: ? ns
64 
65 Test results for "clock_gettime(CLOCK_REALTIME, & value)"
66  - average time per call: 320 ns
67  - resolution: 1 us
68 
69 Test results for "getrusage(RUSAGE_SELF, & value)"
70  - average time per call: 380 ns
71  - resolution: 1 ms
72 
73 Test results for "gettimeofday(& value, NULL)"
74  - average time per call: 65 ns
75  - resolution: 1 us
76 
77 Assuming an HLT process with ~2500 modules and ~500 paths, tracking each step
78 with clock_gettime(CLOCK_THREAD_CPUTIME_ID) gives a per-event overhead of 2 ms
79 
80 Detailed informations on different timers can be extracted running $CMSSW_RELEASE_BASE/test/$SCRAM_ARCH/testTimer .
81 */
82 
83 
85 public:
88 
89  // query the current module/path/event
90  // Note: these functions incur in a "per-call timer overhead" (see above), currently of the order of 340ns
91  double currentModuleTime() const; // return the time spent since the last preModule() event
92  double currentPathTime() const; // return the time spent since the last preProcessPath() event
93  double currentEventTime() const; // return the time spent since the last preProcessEvent() event
94 
95  // query the time spent in a module/path (available after it has run)
96  double queryModuleTime(const edm::ModuleDescription &) const;
97  double queryModuleTimeByLabel(const std::string &) const;
98  double queryModuleTimeByType(const std::string &) const;
99  double queryPathActiveTime(const std::string &) const;
100  double queryPathTotalTime(const std::string &) const;
101 
102  // query the time spent in the current event's
103  // - source (available during event processing)
104  // - all paths (available during endpaths)
105  // - all endpaths (available after all endpaths have run)
106  // - processing (available after the event has been processed)
107  double querySourceTime() const;
108  double queryPathsTime() const;
109  double queryEndPathsTime() const;
110  double queryEventTime() const;
111 
112  /* FIXME not yet implemented
113  // try to assess the overhead which may not be included in the source, paths and event timers
114  double queryPreSourceOverhead() const; // time spent after the previous event's postProcessEvent and this event's preSource
115  double queryPreEventOverhead() const; // time spent after this event's postSource and preProcessEvent
116  double queryPreEndPathsOverhead() const; // time spent after the last path's postProcessPath and the first endpath's preProcessPath
117  */
118 
119  // optionally called to specify the number of concurrent event processors
120  void setNumberOfProcesses(unsigned int);
121 
122 private:
123  void postEndJob();
124  void preBeginRun( edm::RunID const &, edm::Timestamp const & );
126  void preProcessEvent( edm::EventID const &, edm::Timestamp const & );
127  void postProcessEvent( edm::Event const &, edm::EventSetup const & );
128  void preSource();
129  void postSource();
130  void prePathBeginRun( std::string const & );
131  void preProcessPath( std::string const & );
132  void postProcessPath( std::string const &, edm::HLTPathStatus const & );
133  void preModule( edm::ModuleDescription const & );
134  void postModule( edm::ModuleDescription const & );
135 
136  // needed for the DAQ when reconfiguring between runs
137  void reset();
138 
139 public:
140  static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
141 
142 private:
143 
144  struct ModuleInfo {
145  double time_active; // per-event timer: time actually spent in this module
147  TH1F * dqm_active;
148  bool has_just_run; // flag set to check if a module was active inside a particular path, or not
149  bool is_exclusive; // flag set to check if a module has been run only once
150 
151  public:
153  time_active(0.),
154  summary_active(0.),
155  dqm_active(0),
158  { }
159 
161  reset();
162  }
163 
164  // reset the timers and DQM plots
165  void reset() {
166  time_active = 0.;
167  summary_active = 0.;
168  // the DAQ destroys and re-creates the DQM and DQMStore services at each reconfigure, so we don't need to clean them up
169  dqm_active = 0;
170  has_just_run = false;
171  is_exclusive = false;
172  }
173  };
174 
175  struct PathInfo {
176  std::vector<ModuleInfo *> modules; // list of all modules contributing to the path (duplicate modules stored as null pointers)
177  double time_active; // per-event timer: time actually spent in this path
178  double time_premodules; // per-event timer: time spent between "begin path" and the first "begin module"
179  double time_intermodules; // per-event timer: time spent between modules
180  double time_postmodules; // per-event timer: time spent between the last "end module" and "end path"
181  double time_overhead; // per-event timer: sum of time_premodules, time_intermodules, time_postmodules
182  double time_total; // per-event timer: sum of the time spent in all modules which would have run in this path (plus overhead)
189  uint32_t last_run;
190  uint32_t index; // index of the Path or EndPath in the "schedule"
191  TH1F * dqm_active;
196  TH1F * dqm_overhead;
197  TH1F * dqm_total;
198  TH1F * dqm_module_counter; // for each module in the path, track how many times it ran
199  TH1F * dqm_module_active; // for each module in the path, track the active time spent
200  TH1F * dqm_module_total; // for each module in the path, track the total time spent
201 
202  public:
204  modules(),
205  time_active(0.),
206  time_premodules(0.),
207  time_intermodules(0.),
208  time_postmodules(0.),
209  time_overhead(0.),
210  time_total(0.),
211  summary_active(0.),
212  summary_premodules(0.),
215  summary_overhead(0.),
216  summary_total(0.),
217  last_run(0),
218  index(0),
219  dqm_active(0),
220  dqm_exclusive(0),
221  dqm_premodules(0),
222  dqm_intermodules(0),
223  dqm_postmodules(0),
224  dqm_overhead(0),
225  dqm_total(0),
229  { }
230 
232  reset();
233  }
234 
235  // reset the timers and DQM plots
236  void reset() {
237  modules.clear();
238  time_active = 0.;
239  time_premodules = 0.;
240  time_intermodules = 0.;
241  time_postmodules = 0.;
242  time_overhead = 0.;
243  time_total = 0.;
244  summary_active = 0.;
245  summary_premodules = 0.;
247  summary_postmodules = 0.;
248  summary_overhead = 0.;
249  summary_total = 0.;
250  last_run = 0;
251  index = 0;
252 
253  // the DAQ destroys and re-creates the DQM and DQMStore services at each reconfigure, so we don't need to clean them up
254  dqm_active = 0;
255  dqm_premodules = 0;
256  dqm_intermodules = 0;
257  dqm_postmodules = 0;
258  dqm_overhead = 0;
259  dqm_total = 0;
260  dqm_module_counter = 0;
261  dqm_module_active = 0;
262  dqm_module_total = 0;
263  }
264  };
265 
266  template <typename T> class PathMap : public std::unordered_map<std::string, T> {};
267  template <typename T> class ModuleMap : public std::unordered_map<edm::ModuleDescription const *, T> {};
268 
269  // timer configuration
270  const clockid_t m_timer_id; // the default is to use CLOCK_THREAD_CPUTIME_ID, unless useRealTimeClock is set, which will use CLOCK_REALTIME
271  bool m_is_cpu_bound; // if the process is not bound to a single CPU, per-thread or per-process measuerements may be unreliable
276  const bool m_skip_first_path;
277 
278  // dqm configuration
279  const bool m_enable_dqm;
280  const bool m_enable_dqm_bypath_active; // require per-path timers
281  const bool m_enable_dqm_bypath_total; // require per-path and per-module timers
282  const bool m_enable_dqm_bypath_overhead; // require per-path and per-module timers
283  const bool m_enable_dqm_bypath_details; // require per-path and per-module timers
286  const bool m_enable_dqm_bymodule; // require per-module timers
287  const bool m_enable_dqm_bymoduletype; // require per-module timers
290  const bool m_enable_dqm_byls;
292  bool m_nproc_enabled; // check if the plots by number of processes have been correctly enabled
293  const double m_dqm_eventtime_range;
295  const double m_dqm_pathtime_range;
301  const uint32_t m_dqm_ls_range;
302  const std::string m_dqm_path;
303  const edm::InputTag m_luminosity_label; // label of the per-Event luminosity EDProduct
304  const std::vector<unsigned int> m_supported_processes; // possible number of concurrent processes
305 
306  // job configuration and caching
307  std::string const * m_first_path; // the framework does not provide a pre-paths or pre-endpaths signal,
308  std::string const * m_last_path; // so we emulate them keeping track of the first and last path and endpath
309  std::string const * m_first_endpath;
310  std::string const * m_last_endpath;
311  bool m_is_first_module; // helper to measure the time spent between the beginning of the path and the execution of the first module
312 
313  // per-event accounting
314  double m_event;
315  double m_source;
316  double m_all_paths;
318  double m_interpaths;
319 
320  // per-job summary
321  unsigned int m_summary_events; // number of events
327 
328  // DQM
330 
331  // event summary plots
332  TH1F * m_dqm_event;
333  TH1F * m_dqm_source;
337 
338  // event summary plots - summed over nodes with the same number of processes
344 
345  // plots by path
350 
351  // plots per lumisection
352  TProfile * m_dqm_byls_event;
353  TProfile * m_dqm_byls_source;
357 
358  // plots per lumisection - summed over nodes with the same number of processes
364 
365  // plots vs. instantaneous luminosity
371 
372  // plots vs. instantaneous luminosity - summed over nodes with the same number of processes
378 
379  // per-path, per-module and per-module-type accounting
382  std::unordered_map<std::string, ModuleInfo> m_modules;
383  std::unordered_map<std::string, ModuleInfo> m_moduletypes;
384  ModuleMap<ModuleInfo *> m_fast_modules; // these assume that ModuleDescription are stored in the same object through the whole job,
385  ModuleMap<ModuleInfo *> m_fast_moduletypes; // which is true only *after* the edm::Worker constructors have run
386 
387  // timers
388  std::pair<struct timespec, struct timespec> m_timer_event; // track time spent in each event
389  std::pair<struct timespec, struct timespec> m_timer_source; // track time spent in the source
390  std::pair<struct timespec, struct timespec> m_timer_paths; // track time spent in all paths
391  std::pair<struct timespec, struct timespec> m_timer_endpaths; // track time spent in all endpaths
392  std::pair<struct timespec, struct timespec> m_timer_path; // track time spent in each path
393  std::pair<struct timespec, struct timespec> m_timer_module; // track time spent in each module
394  struct timespec m_timer_first_module; // record the start of the first active module in a path, if any
395 
396 #if defined(__APPLE__) || defined (__MACH__)
397  clock_serv_t m_clock_port;
398 #endif // defined(__APPLE__) || defined(__MACH__)
399 
400  void gettime(struct timespec & stamp) const
401  {
402 #if defined(_POSIX_TIMERS) && _POSIX_TIMERS >= 0
403  clock_gettime(m_timer_id, & stamp);
404 #else
405 // special cases which do not support _POSIX_TIMERS
406 #if defined(__APPLE__) || defined (__MACH__)
407  mach_timespec_t timespec;
408  clock_get_time(m_clock_port, &timespec);
409  stamp.tv_sec = timespec.tv_sec;
410  stamp.tv_nsec = timespec.tv_nsec;
411 #endif // defined(__APPLE__) || defined(__MACH__)
412 #endif // defined(_POSIX_TIMERS) && _POSIX_TIMERS >= 0
413  }
414 
415  void start(std::pair<struct timespec, struct timespec> & times) const
416  {
417  gettime(times.first);
418  }
419 
420  void stop(std::pair<struct timespec, struct timespec> & times) const
421  {
422  gettime(times.second);
423  }
424 
425  static
426  double delta(const struct timespec & first, const struct timespec & second)
427  {
428  if (second.tv_nsec > first.tv_nsec)
429  return (double) (second.tv_sec - first.tv_sec) + (double) (second.tv_nsec - first.tv_nsec) / (double) 1e9;
430  else
431  return (double) (second.tv_sec - first.tv_sec) - (double) (first.tv_nsec - second.tv_nsec) / (double) 1e9;
432  }
433 
434  static
435  double delta(const std::pair<struct timespec, struct timespec> & times)
436  {
437  return delta(times.first, times.second);
438  }
439 
440  // associate to a path all the modules it contains
441  void fillPathMap(std::string const & name, std::vector<std::string> const & modules);
442 
443 };
444 
445 #endif // ! FastTimerService_h
void prePathBeginRun(std::string const &)
std::pair< struct timespec, struct timespec > m_timer_path
const double m_dqm_eventtime_resolution
TProfile * m_dqm_byls_all_endpaths
const bool m_enable_dqm_byluminosity
const uint32_t m_dqm_ls_range
double queryModuleTimeByType(const std::string &) const
ModuleMap< ModuleInfo * > m_fast_moduletypes
TProfile * m_dqm_byls_source
TProfile * m_dqm_nproc_byls_all_paths
double queryEventTime() const
std::pair< struct timespec, struct timespec > m_timer_event
const clockid_t m_timer_id
const double m_dqm_luminosity_resolution
double queryModuleTimeByLabel(const std::string &) const
TProfile * m_dqm_byluminosity_interpaths
TProfile * m_dqm_nproc_byluminosity_source
void preProcessEvent(edm::EventID const &, edm::Timestamp const &)
const bool m_enable_dqm_bypath_counters
void postProcessEvent(edm::Event const &, edm::EventSetup const &)
const bool m_skip_first_path
std::string const * m_last_path
TProfile * m_dqm_nproc_byls_interpaths
void postModule(edm::ModuleDescription const &)
const bool m_enable_dqm_bypath_exclusive
double queryPathTotalTime(const std::string &) const
std::unordered_map< std::string, ModuleInfo > m_moduletypes
static double delta(const std::pair< struct timespec, struct timespec > &times)
const double m_dqm_pathtime_range
std::string const * m_first_endpath
const edm::InputTag m_luminosity_label
PathInfo * m_current_path
void setNumberOfProcesses(unsigned int)
std::pair< struct timespec, struct timespec > m_timer_endpaths
double queryEndPathsTime() const
TProfile * m_dqm_nproc_byluminosity_event
U second(std::pair< T, U > const &p)
TProfile * m_dqm_nproc_byluminosity_all_paths
TProfile * m_dqm_byls_interpaths
void preModuleBeginJob(edm::ModuleDescription const &)
const bool m_enable_dqm_bynproc
double querySourceTime() const
void stop(std::pair< struct timespec, struct timespec > &times) const
std::pair< struct timespec, struct timespec > m_timer_paths
const bool m_enable_dqm_byls
int clockid_t
const double m_dqm_moduletime_resolution
TProfile * m_dqm_byluminosity_source
void preModule(edm::ModuleDescription const &)
const bool m_enable_dqm
TProfile * m_dqm_nproc_byluminosity_all_endpaths
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry &)
std::string const * m_first_path
TProfile * m_dqm_nproc_byls_source
TProfile * m_dqm_nproc_byls_all_endpaths
std::string const * m_last_endpath
double queryPathActiveTime(const std::string &) const
void start(std::pair< struct timespec, struct timespec > &times) const
PathMap< PathInfo > m_paths
struct timespec m_timer_first_module
const bool m_enable_dqm_bypath_details
const bool m_enable_dqm_bypath_overhead
bool first
Definition: L1TdeRCT.cc:94
TProfile * m_dqm_nproc_byls_event
const std::string m_dqm_path
TProfile * m_dqm_byluminosity_all_endpaths
std::unordered_map< std::string, ModuleInfo > m_modules
ModuleMap< ModuleInfo * > m_fast_modules
const bool m_enable_dqm_bymodule
const double m_dqm_moduletime_range
TProfile * m_dqm_byluminosity_all_paths
double queryModuleTime(const edm::ModuleDescription &) const
TProfile * m_dqm_paths_exclusive_time
double currentModuleTime() const
TProfile * m_dqm_byls_all_paths
double queryPathsTime() const
const bool m_enable_timing_summary
const bool m_enable_dqm_bymoduletype
std::pair< struct timespec, struct timespec > m_timer_module
const bool m_enable_dqm_summary
TProfile * m_dqm_byls_event
void gettime(struct timespec &stamp) const
const bool m_enable_dqm_bypath_total
void fillPathMap(std::string const &name, std::vector< std::string > const &modules)
const bool m_enable_dqm_bypath_active
unsigned int m_summary_events
void postProcessPath(std::string const &, edm::HLTPathStatus const &)
const std::vector< unsigned int > m_supported_processes
TProfile * m_dqm_byluminosity_event
std::vector< ModuleInfo * > modules
TProfile * m_dqm_paths_active_time
const double m_dqm_luminosity_range
TH1F * m_dqm_nproc_all_endpaths
TProfile * m_dqm_paths_total_time
TProfile * m_dqm_nproc_byluminosity_interpaths
double currentEventTime() const
void preProcessPath(std::string const &)
std::pair< struct timespec, struct timespec > m_timer_source
static double delta(const struct timespec &first, const struct timespec &second)
TProfile * m_dqm_paths_interpaths
void preBeginRun(edm::RunID const &, edm::Timestamp const &)
const double m_dqm_eventtime_range
const double m_dqm_pathtime_resolution
double currentPathTime() const