CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/HLTrigger/Timer/interface/FastTimerService.h

Go to the documentation of this file.
00001 #ifndef FastTimerService_h
00002 #define FastTimerService_h
00003 
00004 //system headers
00005 #ifdef __linux
00006 #include <time.h>
00007 #else
00008 typedef int clockid_t;
00009 #endif
00010 
00011 /* Darwin system headers */
00012 #if defined(__APPLE__) || defined(__MACH__)
00013 #include <mach/clock.h>
00014 #include <mach/mach.h>
00015 #endif // defined(__APPLE__) || defined(__MACH__)
00016 
00017 // C++ headers
00018 #include <cmath>
00019 #include <string>
00020 #include <map>
00021 #include <unordered_map>
00022 #include <unistd.h>
00023 
00024 // CMSSW headers
00025 #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
00026 #include "FWCore/ServiceRegistry/interface/Service.h"
00027 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00028 #include "FWCore/Framework/interface/TriggerNamesService.h"
00029 #include "FWCore/MessageLogger/interface/MessageLogger.h"
00030 #include "DataFormats/Common/interface/HLTPathStatus.h"
00031 #include "DataFormats/Provenance/interface/EventID.h"
00032 #include "DataFormats/Provenance/interface/Timestamp.h"
00033 #include "DataFormats/Provenance/interface/ModuleDescription.h"
00034 #include "DQMServices/Core/interface/DQMStore.h"
00035 #include "DQMServices/Core/interface/MonitorElement.h"
00036 
00037 
00038 /*
00039 procesing time is diveded into
00040  - source
00041  - pre-event processing overhead
00042  - event processing
00043  - post-event processing overhead
00044 
00045 until lumi-processing and run-processing are taken into account, they will count as inter-event overhead
00046 
00047 event processing time is diveded into
00048  - trigger processing (from the begin of the first path to the end of the last path)
00049  - trigger overhead
00050  - endpath processing (from the begin of the first endpath to the end of the last endpath)
00051  - endpath overhead
00052 */
00053 
00054 /*
00055 Timer per-call overhead (on lxplus, SLC 5.7):
00056 
00057 Test results for "clock_gettime(CLOCK_THREAD_CPUTIME_ID, & value)"
00058  - average time per call: 340 ns
00059  - resolution:              ? ns
00060 
00061 Test results for "clock_gettime(CLOCK_PROCESS_CPUTIME_ID, & value)"
00062  - average time per call: 360 ns
00063  - resolution:              ? ns
00064 
00065 Test results for "clock_gettime(CLOCK_REALTIME, & value)"
00066  - average time per call: 320 ns
00067  - resolution:              1 us
00068 
00069 Test results for "getrusage(RUSAGE_SELF, & value)"
00070  - average time per call: 380 ns
00071  - resolution:              1 ms
00072 
00073 Test results for "gettimeofday(& value, NULL)"
00074  - average time per call:  65 ns
00075  - resolution:              1 us
00076 
00077 Assuming an HLT process with ~2500 modules and ~500 paths, tracking each step
00078 with clock_gettime(CLOCK_THREAD_CPUTIME_ID) gives a per-event overhead of 2 ms
00079 
00080 Detailed informations on different timers can be extracted running $CMSSW_RELEASE_BASE/test/$SCRAM_ARCH/testTimer .
00081 */
00082 
00083 
00084 class FastTimerService {
00085 public:
00086   FastTimerService(const edm::ParameterSet &, edm::ActivityRegistry & );
00087   ~FastTimerService();
00088 
00089   // query the current module/path/event
00090   // Note: these functions incur in a "per-call timer overhead" (see above), currently of the order of 340ns
00091   double currentModuleTime() const;         // return the time spent since the last preModule() event
00092   double currentPathTime() const;           // return the time spent since the last preProcessPath() event
00093   double currentEventTime() const;          // return the time spent since the last preProcessEvent() event
00094 
00095   // query the time spent in a module/path (available after it has run)
00096   double queryModuleTime(const edm::ModuleDescription &) const;
00097   double queryModuleTimeByLabel(const std::string &) const;
00098   double queryModuleTimeByType(const std::string &) const;
00099   double queryPathActiveTime(const std::string &) const;
00100   double queryPathTotalTime(const std::string &) const;
00101 
00102   // query the time spent in the current event's
00103   //  - source        (available during event processing)
00104   //  - all paths     (available during endpaths)
00105   //  - all endpaths  (available after all endpaths have run)
00106   //  - processing    (available after the event has been processed)
00107   double querySourceTime() const;
00108   double queryPathsTime() const;
00109   double queryEndPathsTime() const;
00110   double queryEventTime() const;
00111 
00112   /* FIXME not yet implemented
00113   // try to assess the overhead which may not be included in the source, paths and event timers
00114   double queryPreSourceOverhead() const;    // time spent after the previous event's postProcessEvent and this event's preSource
00115   double queryPreEventOverhead() const;     // time spent after this event's postSource and preProcessEvent
00116   double queryPreEndPathsOverhead() const;  // time spent after the last path's postProcessPath and the first endpath's preProcessPath
00117   */
00118 
00119   // optionally called to specify the number of concurrent event processors
00120   void setNumberOfProcesses(unsigned int);
00121 
00122 private:
00123   void postEndJob();
00124   void preBeginRun( edm::RunID const &, edm::Timestamp const & );
00125   void preModuleBeginJob( edm::ModuleDescription const & );
00126   void preProcessEvent( edm::EventID const &, edm::Timestamp const & );
00127   void postProcessEvent( edm::Event const &, edm::EventSetup const & );
00128   void preSource();
00129   void postSource();
00130   void prePathBeginRun( std::string const & );
00131   void preProcessPath(  std::string const & );
00132   void postProcessPath( std::string const &, edm::HLTPathStatus const & );
00133   void preModule(  edm::ModuleDescription const & );
00134   void postModule( edm::ModuleDescription const & );
00135 
00136   // needed for the DAQ when reconfiguring between runs
00137   void reset();
00138 
00139 public:
00140   static void fillDescriptions(edm::ConfigurationDescriptions & descriptions);
00141 
00142 private:
00143 
00144   struct ModuleInfo {
00145     double                      time_active;        // per-event timer: time actually spent in this module
00146     double                      summary_active;
00147     TH1F *                      dqm_active;
00148     bool                        has_just_run;       // flag set to check if a module was active inside a particular path, or not
00149     bool                        is_exclusive;       // flag set to check if a module has been run only once
00150 
00151   public:
00152     ModuleInfo() :
00153       time_active(0.),
00154       summary_active(0.),
00155       dqm_active(0),
00156       has_just_run(false),
00157       is_exclusive(false)
00158     { }
00159 
00160     ~ModuleInfo() {
00161       reset();
00162     }
00163 
00164     // reset the timers and DQM plots
00165     void reset() {
00166       time_active = 0.;
00167       summary_active = 0.;
00168       // the DAQ destroys and re-creates the DQM and DQMStore services at each reconfigure, so we don't need to clean them up
00169       dqm_active = 0;
00170       has_just_run = false;
00171       is_exclusive = false;
00172     }
00173   };
00174 
00175   struct PathInfo {
00176     std::vector<ModuleInfo *>   modules;            // list of all modules contributing to the path (duplicate modules stored as null pointers)
00177     double                      time_active;        // per-event timer: time actually spent in this path
00178     double                      time_premodules;    // per-event timer: time spent between "begin path" and the first "begin module"
00179     double                      time_intermodules;  // per-event timer: time spent between modules
00180     double                      time_postmodules;   // per-event timer: time spent between the last "end module" and "end path"
00181     double                      time_overhead;      // per-event timer: sum of time_premodules, time_intermodules, time_postmodules
00182     double                      time_total;         // per-event timer: sum of the time spent in all modules which would have run in this path (plus overhead)
00183     double                      summary_active;
00184     double                      summary_premodules;
00185     double                      summary_intermodules;
00186     double                      summary_postmodules;
00187     double                      summary_overhead;
00188     double                      summary_total;
00189     uint32_t                    last_run;
00190     uint32_t                    index;              // index of the Path or EndPath in the "schedule"
00191     TH1F *                      dqm_active;
00192     TH1F *                      dqm_exclusive;
00193     TH1F *                      dqm_premodules;
00194     TH1F *                      dqm_intermodules;
00195     TH1F *                      dqm_postmodules;
00196     TH1F *                      dqm_overhead;
00197     TH1F *                      dqm_total;
00198     TH1F *                      dqm_module_counter; // for each module in the path, track how many times it ran
00199     TH1F *                      dqm_module_active;  // for each module in the path, track the active time spent 
00200     TH1F *                      dqm_module_total;   // for each module in the path, track the total time spent 
00201 
00202   public:
00203     PathInfo() :
00204       modules(),
00205       time_active(0.),
00206       time_premodules(0.),
00207       time_intermodules(0.),
00208       time_postmodules(0.),
00209       time_overhead(0.),
00210       time_total(0.),
00211       summary_active(0.),
00212       summary_premodules(0.),
00213       summary_intermodules(0.),
00214       summary_postmodules(0.),
00215       summary_overhead(0.),
00216       summary_total(0.),
00217       last_run(0),
00218       index(0),
00219       dqm_active(0),
00220       dqm_exclusive(0),
00221       dqm_premodules(0),
00222       dqm_intermodules(0),
00223       dqm_postmodules(0),
00224       dqm_overhead(0),
00225       dqm_total(0),
00226       dqm_module_counter(0),
00227       dqm_module_active(0),
00228       dqm_module_total(0)
00229     { }
00230 
00231     ~PathInfo() {
00232       reset();
00233     }
00234 
00235     // reset the timers and DQM plots
00236     void reset() {
00237       modules.clear();
00238       time_active = 0.;
00239       time_premodules = 0.;
00240       time_intermodules = 0.;
00241       time_postmodules = 0.;
00242       time_overhead = 0.;
00243       time_total = 0.;
00244       summary_active = 0.;
00245       summary_premodules = 0.;
00246       summary_intermodules = 0.;
00247       summary_postmodules = 0.;
00248       summary_overhead = 0.;
00249       summary_total = 0.;
00250       last_run = 0;
00251       index = 0;
00252 
00253       // the DAQ destroys and re-creates the DQM and DQMStore services at each reconfigure, so we don't need to clean them up
00254       dqm_active = 0; 
00255       dqm_premodules = 0;
00256       dqm_intermodules = 0;
00257       dqm_postmodules = 0;
00258       dqm_overhead = 0;
00259       dqm_total = 0;
00260       dqm_module_counter = 0;
00261       dqm_module_active = 0;
00262       dqm_module_total = 0;
00263     }
00264   };
00265 
00266   template <typename T> class PathMap   : public std::unordered_map<std::string, T> {};
00267   template <typename T> class ModuleMap : public std::unordered_map<edm::ModuleDescription const *, T> {};
00268 
00269   // timer configuration
00270   const clockid_t                               m_timer_id;             // the default is to use CLOCK_THREAD_CPUTIME_ID, unless useRealTimeClock is set, which will use CLOCK_REALTIME
00271   bool                                          m_is_cpu_bound;         // if the process is not bound to a single CPU, per-thread or per-process measuerements may be unreliable
00272   bool                                          m_enable_timing_paths;
00273   bool                                          m_enable_timing_modules;
00274   bool                                          m_enable_timing_exclusive;
00275   const bool                                    m_enable_timing_summary;
00276   const bool                                    m_skip_first_path;
00277 
00278   // dqm configuration
00279   const bool                                    m_enable_dqm;
00280   const bool                                    m_enable_dqm_bypath_active;     // require per-path timers
00281   const bool                                    m_enable_dqm_bypath_total;      // require per-path and per-module timers
00282   const bool                                    m_enable_dqm_bypath_overhead;   // require per-path and per-module timers
00283   const bool                                    m_enable_dqm_bypath_details;    // require per-path and per-module timers
00284   const bool                                    m_enable_dqm_bypath_counters;
00285   const bool                                    m_enable_dqm_bypath_exclusive;
00286   const bool                                    m_enable_dqm_bymodule;          // require per-module timers
00287   const bool                                    m_enable_dqm_bymoduletype;      // require per-module timers
00288   const bool                                    m_enable_dqm_summary;
00289   const bool                                    m_enable_dqm_byluminosity;
00290   const bool                                    m_enable_dqm_byls;
00291   const bool                                    m_enable_dqm_bynproc;
00292   bool                                          m_nproc_enabled;                // check if the plots by number of processes have been correctly enabled
00293   const double                                  m_dqm_eventtime_range;
00294   const double                                  m_dqm_eventtime_resolution;
00295   const double                                  m_dqm_pathtime_range;
00296   const double                                  m_dqm_pathtime_resolution;
00297   const double                                  m_dqm_moduletime_range;
00298   const double                                  m_dqm_moduletime_resolution;
00299   const double                                  m_dqm_luminosity_range;
00300   const double                                  m_dqm_luminosity_resolution;
00301   const uint32_t                                m_dqm_ls_range;
00302   const std::string                             m_dqm_path;
00303   const edm::InputTag                           m_luminosity_label;     // label of the per-Event luminosity EDProduct
00304   const std::vector<unsigned int>               m_supported_processes;  // possible number of concurrent processes
00305 
00306   // job configuration and caching
00307   std::string const *                           m_first_path;           // the framework does not provide a pre-paths or pre-endpaths signal,
00308   std::string const *                           m_last_path;            // so we emulate them keeping track of the first and last path and endpath
00309   std::string const *                           m_first_endpath;
00310   std::string const *                           m_last_endpath;
00311   bool                                          m_is_first_module;      // helper to measure the time spent between the beginning of the path and the execution of the first module
00312   bool                                          m_is_first_event;
00313 
00314   // per-event accounting
00315   double                                        m_event;
00316   double                                        m_presource;
00317   double                                        m_source;
00318   double                                        m_postsource;
00319   double                                        m_all_paths;
00320   double                                        m_all_endpaths;
00321   double                                        m_interpaths;
00322 
00323   // per-job summary
00324   unsigned int                                  m_summary_events;       // number of events
00325   double                                        m_summary_event;
00326   double                                        m_summary_presource;
00327   double                                        m_summary_source;
00328   double                                        m_summary_postsource;
00329   double                                        m_summary_all_paths;
00330   double                                        m_summary_all_endpaths;
00331   double                                        m_summary_interpaths;
00332 
00333   // DQM
00334   DQMStore *                                    m_dqms;
00335 
00336   // event summary plots
00337   TH1F *                                        m_dqm_event;
00338   TH1F *                                        m_dqm_presource;
00339   TH1F *                                        m_dqm_source;
00340   TH1F *                                        m_dqm_postsource;
00341   TH1F *                                        m_dqm_all_paths;
00342   TH1F *                                        m_dqm_all_endpaths;
00343   TH1F *                                        m_dqm_interpaths;
00344 
00345   // event summary plots - summed over nodes with the same number of processes
00346   TH1F *                                        m_dqm_nproc_event;
00347   TH1F *                                        m_dqm_nproc_presource;
00348   TH1F *                                        m_dqm_nproc_source;
00349   TH1F *                                        m_dqm_nproc_postsource;
00350   TH1F *                                        m_dqm_nproc_all_paths;
00351   TH1F *                                        m_dqm_nproc_all_endpaths;
00352   TH1F *                                        m_dqm_nproc_interpaths;
00353 
00354   // plots by path
00355   TProfile *                                    m_dqm_paths_active_time;
00356   TProfile *                                    m_dqm_paths_total_time;
00357   TProfile *                                    m_dqm_paths_exclusive_time;
00358   TProfile *                                    m_dqm_paths_interpaths;
00359 
00360   // plots per lumisection
00361   TProfile *                                    m_dqm_byls_event;
00362   TProfile *                                    m_dqm_byls_presource;
00363   TProfile *                                    m_dqm_byls_source;
00364   TProfile *                                    m_dqm_byls_postsource;
00365   TProfile *                                    m_dqm_byls_all_paths;
00366   TProfile *                                    m_dqm_byls_all_endpaths;
00367   TProfile *                                    m_dqm_byls_interpaths;
00368 
00369   // plots per lumisection - summed over nodes with the same number of processes
00370   TProfile *                                    m_dqm_nproc_byls_event;
00371   TProfile *                                    m_dqm_nproc_byls_presource;
00372   TProfile *                                    m_dqm_nproc_byls_source;
00373   TProfile *                                    m_dqm_nproc_byls_postsource;
00374   TProfile *                                    m_dqm_nproc_byls_all_paths;
00375   TProfile *                                    m_dqm_nproc_byls_all_endpaths;
00376   TProfile *                                    m_dqm_nproc_byls_interpaths;
00377 
00378   // plots vs. instantaneous luminosity
00379   TProfile *                                    m_dqm_byluminosity_event;
00380   TProfile *                                    m_dqm_byluminosity_presource;
00381   TProfile *                                    m_dqm_byluminosity_source;
00382   TProfile *                                    m_dqm_byluminosity_postsource;
00383   TProfile *                                    m_dqm_byluminosity_all_paths;
00384   TProfile *                                    m_dqm_byluminosity_all_endpaths;
00385   TProfile *                                    m_dqm_byluminosity_interpaths;
00386 
00387   // plots vs. instantaneous luminosity - summed over nodes with the same number of processes
00388   TProfile *                                    m_dqm_nproc_byluminosity_event;
00389   TProfile *                                    m_dqm_nproc_byluminosity_presource;
00390   TProfile *                                    m_dqm_nproc_byluminosity_source;
00391   TProfile *                                    m_dqm_nproc_byluminosity_postsource;
00392   TProfile *                                    m_dqm_nproc_byluminosity_all_paths;
00393   TProfile *                                    m_dqm_nproc_byluminosity_all_endpaths;
00394   TProfile *                                    m_dqm_nproc_byluminosity_interpaths;
00395 
00396   // per-path, per-module and per-module-type accounting
00397   PathInfo *                                    m_current_path;
00398   PathMap<PathInfo>                             m_paths;
00399   std::unordered_map<std::string, ModuleInfo>   m_modules;
00400   std::unordered_map<std::string, ModuleInfo>   m_moduletypes;
00401   ModuleMap<ModuleInfo *>                       m_fast_modules;         // these assume that ModuleDescription are stored in the same object through the whole job,
00402   ModuleMap<ModuleInfo *>                       m_fast_moduletypes;     // which is true only *after* the edm::Worker constructors have run
00403 
00404   // timers
00405   std::pair<struct timespec, struct timespec>   m_timer_event;          // track time spent in each event
00406   std::pair<struct timespec, struct timespec>   m_timer_source;         // track time spent in the source
00407   std::pair<struct timespec, struct timespec>   m_timer_paths;          // track time spent in all paths
00408   std::pair<struct timespec, struct timespec>   m_timer_endpaths;       // track time spent in all endpaths
00409   std::pair<struct timespec, struct timespec>   m_timer_path;           // track time spent in each path
00410   std::pair<struct timespec, struct timespec>   m_timer_module;         // track time spent in each module
00411   struct timespec                               m_timer_first_module;   // record the start of the first active module in a path, if any
00412 
00413 #if defined(__APPLE__) || defined (__MACH__)
00414   clock_serv_t m_clock_port;
00415 #endif // defined(__APPLE__) || defined(__MACH__)
00416 
00417   void gettime(struct timespec & stamp) const
00418   {
00419 #if defined(_POSIX_TIMERS) && _POSIX_TIMERS >= 0
00420     clock_gettime(m_timer_id, & stamp);
00421 #else
00422 // special cases which do not support _POSIX_TIMERS
00423 #if defined(__APPLE__) || defined (__MACH__)
00424     mach_timespec_t timespec;
00425     clock_get_time(m_clock_port, &timespec);
00426     stamp.tv_sec  = timespec.tv_sec;
00427     stamp.tv_nsec = timespec.tv_nsec;
00428 #endif // defined(__APPLE__) || defined(__MACH__)
00429 #endif // defined(_POSIX_TIMERS) && _POSIX_TIMERS >= 0
00430   }
00431 
00432   void start(std::pair<struct timespec, struct timespec> & times) const
00433   {
00434     gettime(times.first);
00435   }
00436 
00437   void stop(std::pair<struct timespec, struct timespec> & times) const
00438   {
00439     gettime(times.second);
00440   }
00441 
00442   static
00443   double delta(const struct timespec & first, const struct timespec & second)
00444   {
00445     if (second.tv_nsec > first.tv_nsec)
00446       return (double) (second.tv_sec - first.tv_sec) + (double) (second.tv_nsec - first.tv_nsec) / (double) 1e9;
00447     else
00448       return (double) (second.tv_sec - first.tv_sec) - (double) (first.tv_nsec - second.tv_nsec) / (double) 1e9;
00449   }
00450 
00451   static
00452   double delta(const std::pair<struct timespec, struct timespec> & times)
00453   {
00454     return delta(times.first, times.second);
00455   }
00456 
00457   // associate to a path all the modules it contains
00458   void fillPathMap(std::string const & name, std::vector<std::string> const & modules);
00459 
00460 };
00461 
00462 #endif // ! FastTimerService_h