CMS 3D CMS Logo

ZombieKillerService.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 //
3 // Package: FWCore/Services
4 // Class : ZombieKillerService
5 //
6 // Implementation:
7 // [Notes on implementation]
8 //
9 // Original Author: Chris Jones
10 // Created: Sat, 22 Mar 2014 16:25:47 GMT
11 //
12 
13 // system include files
14 #include <atomic>
15 #include <thread>
16 #include <mutex>
17 #include <condition_variable>
18 #include <exception>
19 
20 // user include files
25 
26 namespace edm {
28  public:
30 
31  static void fillDescriptions(ConfigurationDescriptions& descriptions);
32 
33  private:
34  const unsigned int m_checkThreshold;
35  const unsigned int m_secsBetweenChecks;
36  std::thread m_watchingThread;
37  std::condition_variable m_jobDoneCondition;
39  bool m_jobDone;
40  std::atomic<bool> m_stillAlive;
41  std::atomic<unsigned int> m_numberChecksWhenNotAlive;
42 
43  void notAZombieYet();
44  void checkForZombie();
45  void startThread();
46  void stopThread();
47  };
48 } // namespace edm
49 
50 using namespace edm;
51 
52 inline bool isProcessWideService(ZombieKillerService const*) { return true; }
53 
54 //
55 // constants, enums and typedefs
56 //
57 
58 //
59 // static data member definitions
60 //
61 
62 //
63 // constructors and destructor
64 //
66  : m_checkThreshold(iPSet.getUntrackedParameter<unsigned int>("numberOfAllowedFailedChecksInARow")),
67  m_secsBetweenChecks(iPSet.getUntrackedParameter<unsigned int>("secondsBetweenChecks")),
68  m_jobDone(false),
69  m_stillAlive(true),
70  m_numberChecksWhenNotAlive(0) {
71  iRegistry.watchPostBeginJob([this]() { startThread(); });
72  iRegistry.watchPostEndJob([this]() { stopThread(); });
73 
74  iRegistry.watchPreSourceRun([this](RunIndex) { notAZombieYet(); });
75  iRegistry.watchPostSourceRun([this](RunIndex) { notAZombieYet(); });
76 
77  iRegistry.watchPreSourceLumi([this](LuminosityBlockIndex) { notAZombieYet(); });
78  iRegistry.watchPostSourceLumi([this](LuminosityBlockIndex) { notAZombieYet(); });
79 
80  iRegistry.watchPreSourceEvent([this](StreamID) { notAZombieYet(); });
81  iRegistry.watchPostSourceEvent([this](StreamID) { notAZombieYet(); });
82 
83  iRegistry.watchPreModuleBeginStream([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
84  iRegistry.watchPostModuleBeginStream([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
85 
86  iRegistry.watchPreModuleEndStream([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
87  iRegistry.watchPostModuleEndStream([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
88 
89  iRegistry.watchPreModuleEndJob([this](ModuleDescription const&) { notAZombieYet(); });
90  iRegistry.watchPostModuleEndJob([this](ModuleDescription const&) { notAZombieYet(); });
91  iRegistry.watchPreModuleEvent([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
92  iRegistry.watchPostModuleEvent([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
93 
95  [this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
97  [this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
98 
99  iRegistry.watchPreModuleStreamEndRun([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
100  iRegistry.watchPostModuleStreamEndRun([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
101 
103  [this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
105  [this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
106 
107  iRegistry.watchPreModuleStreamEndLumi([this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
109  [this](StreamContext const&, ModuleCallingContext const&) { notAZombieYet(); });
110 
112  [this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
114  [this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
115 
116  iRegistry.watchPreModuleGlobalEndRun([this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
117  iRegistry.watchPostModuleGlobalEndRun([this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
118 
120  [this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
122  [this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
123 
124  iRegistry.watchPreModuleGlobalEndLumi([this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
126  [this](GlobalContext const&, ModuleCallingContext const&) { notAZombieYet(); });
127 }
128 
129 // ZombieKillerService::ZombieKillerService(const ZombieKillerService& rhs)
130 // {
131 // // do actual copying here;
132 // }
133 
134 //ZombieKillerService::~ZombieKillerService()
135 //{
136 //}
137 
138 //
139 // assignment operators
140 //
141 // const ZombieKillerService& ZombieKillerService::operator=(const ZombieKillerService& rhs)
142 // {
143 // //An exception safe implementation is
144 // ZombieKillerService temp(rhs);
145 // swap(rhs);
146 //
147 // return *this;
148 // }
149 
150 //
151 // member functions
152 //
155  m_stillAlive = true;
156 }
157 
159  if (not m_stillAlive) {
162  edm::LogError("JobStuck") << "Too long since the job has last made progress.";
163  std::terminate();
164  } else {
165  edm::LogWarning("JobProgressing") << "It has been " << m_numberChecksWhenNotAlive * m_secsBetweenChecks
166  << " seconds since job seen progressing";
167  }
168  }
169  m_stillAlive = false;
170 }
171 
173  m_watchingThread = std::thread([this]() {
174  std::unique_lock<std::mutex> lock(m_jobDoneMutex);
175  while (not m_jobDoneCondition.wait_for(
176  lock, std::chrono::seconds(m_secsBetweenChecks), [this]() -> bool { return m_jobDone; })) {
177  //we timed out
178  checkForZombie();
179  }
180  });
181 }
182 
184  {
185  std::lock_guard<std::mutex> guard(m_jobDoneMutex);
186  m_jobDone = true;
187  }
188  m_jobDoneCondition.notify_all();
189  m_watchingThread.join();
190 }
191 
194  desc.addUntracked<unsigned int>("secondsBetweenChecks", 60)
195  ->setComment("Number of seconds to wait between checking if progress has been made.");
196  desc.addUntracked<unsigned int>("numberOfAllowedFailedChecksInARow", 3)
197  ->setComment("Number of allowed checks in a row with no progress.");
198  descriptions.add("ZombieKillerService", desc);
199 }
200 
201 //
202 // const member functions
203 //
204 
205 //
206 // static member functions
207 //
208 
edm::ActivityRegistry::watchPreModuleEndStream
void watchPreModuleEndStream(PreModuleEndStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:258
edm::StreamID
Definition: StreamID.h:30
edm::ActivityRegistry::watchPreModuleBeginStream
void watchPreModuleBeginStream(PreModuleBeginStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:244
edm::ZombieKillerService::fillDescriptions
static void fillDescriptions(ConfigurationDescriptions &descriptions)
Definition: ZombieKillerService.cc:192
edm::ZombieKillerService::ZombieKillerService
ZombieKillerService(edm::ParameterSet const &, edm::ActivityRegistry &)
Definition: ZombieKillerService.cc:65
edm::ActivityRegistry::watchPostModuleGlobalEndRun
void watchPostModuleGlobalEndRun(PostModuleGlobalEndRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:807
edm::ZombieKillerService::m_checkThreshold
const unsigned int m_checkThreshold
Definition: ZombieKillerService.cc:34
MessageLogger.h
funct::false
false
Definition: Factorize.h:34
edm::ActivityRegistry::watchPostSourceRun
void watchPostSourceRun(PostSourceRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:212
edm::ActivityRegistry::watchPreSourceLumi
void watchPreSourceLumi(PreSourceLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:194
edm::ZombieKillerService::m_jobDone
bool m_jobDone
Definition: ZombieKillerService.cc:39
edm::ActivityRegistry::watchPostModuleStreamBeginLumi
void watchPostModuleStreamBeginLumi(PostModuleStreamBeginLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:765
edm::ZombieKillerService::checkForZombie
void checkForZombie()
Definition: ZombieKillerService.cc:158
isProcessWideService
bool isProcessWideService(ZombieKillerService const *)
Definition: ZombieKillerService.cc:52
edm
HLT enums.
Definition: AlignableModifier.h:19
edm::ZombieKillerService
Definition: ZombieKillerService.cc:27
edm::ActivityRegistry::watchPreModuleEndJob
void watchPreModuleEndJob(PreModuleEndJob::slot_type const &iSlot)
Definition: ActivityRegistry.h:641
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
DEFINE_FWK_SERVICE
#define DEFINE_FWK_SERVICE(type)
Definition: ServiceMaker.h:105
edm::ZombieKillerService::m_stillAlive
std::atomic< bool > m_stillAlive
Definition: ZombieKillerService.cc:40
edm::ActivityRegistry::watchPostModuleGlobalBeginRun
void watchPostModuleGlobalBeginRun(PostModuleGlobalBeginRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:793
edm::ActivityRegistry::watchPostModuleStreamEndRun
void watchPostModuleStreamEndRun(PostModuleStreamEndRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:751
edm::ActivityRegistry::watchPreModuleStreamBeginRun
void watchPreModuleStreamBeginRun(PreModuleStreamBeginRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:730
edm::ZombieKillerService::startThread
void startThread()
Definition: ZombieKillerService.cc:172
edm::ModuleDescription
Definition: ModuleDescription.h:21
edm::ActivityRegistry::watchPostModuleBeginStream
void watchPostModuleBeginStream(PostModuleBeginStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:251
edm::ActivityRegistry::watchPostSourceEvent
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:188
ActivityRegistry.h
edm::ActivityRegistry::watchPostModuleEndJob
void watchPostModuleEndJob(PostModuleEndJob::slot_type const &iSlot)
Definition: ActivityRegistry.h:647
edm::ActivityRegistry::watchPreModuleGlobalBeginLumi
void watchPreModuleGlobalBeginLumi(PreModuleGlobalBeginLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:814
edm::ActivityRegistry::watchPreModuleStreamEndLumi
void watchPreModuleStreamEndLumi(PreModuleStreamEndLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:772
edm::ActivityRegistry::watchPostEndJob
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
Definition: ActivityRegistry.h:168
edm::LuminosityBlockIndex
Definition: LuminosityBlockIndex.h:33
edm::ConfigurationDescriptions::add
void add(std::string const &label, ParameterSetDescription const &psetDescription)
Definition: ConfigurationDescriptions.cc:57
edm::StreamContext
Definition: StreamContext.h:31
edm::ActivityRegistry::watchPostModuleEndStream
void watchPostModuleEndStream(PostModuleEndStream::slot_type const &iSlot)
Definition: ActivityRegistry.h:265
seconds
double seconds()
edm::ZombieKillerService::notAZombieYet
void notAZombieYet()
Definition: ZombieKillerService.cc:153
edm::ActivityRegistry
Definition: ActivityRegistry.h:132
edm::ActivityRegistry::watchPostBeginJob
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
Definition: ActivityRegistry.h:156
edm::ActivityRegistry::watchPreModuleStreamBeginLumi
void watchPreModuleStreamBeginLumi(PreModuleStreamBeginLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:758
edm::ZombieKillerService::m_jobDoneMutex
std::mutex m_jobDoneMutex
Definition: ZombieKillerService.cc:38
mutex
static boost::mutex mutex
Definition: Proxy.cc:9
edm::ZombieKillerService::m_secsBetweenChecks
const unsigned int m_secsBetweenChecks
Definition: ZombieKillerService.cc:35
edm::ActivityRegistry::watchPreSourceRun
void watchPreSourceRun(PreSourceRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:206
ServiceMaker.h
edm::ConfigurationDescriptions
Definition: ConfigurationDescriptions.h:28
edm::ActivityRegistry::watchPostSourceLumi
void watchPostSourceLumi(PostSourceLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:200
edm::LogWarning
Definition: MessageLogger.h:141
edm::ZombieKillerService::m_watchingThread
std::thread m_watchingThread
Definition: ZombieKillerService.cc:36
funct::true
true
Definition: Factorize.h:173
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:100
edm::ActivityRegistry::watchPreModuleGlobalEndLumi
void watchPreModuleGlobalEndLumi(PreModuleGlobalEndLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:828
edm::GlobalContext
Definition: GlobalContext.h:29
edm::ActivityRegistry::watchPreModuleStreamEndRun
void watchPreModuleStreamEndRun(PreModuleStreamEndRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:744
edm::ParameterSet
Definition: ParameterSet.h:36
edm::LogError
Definition: MessageLogger.h:183
edm::ActivityRegistry::watchPostModuleGlobalEndLumi
void watchPostModuleGlobalEndLumi(PostModuleGlobalEndLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:835
CommonMethods.lock
def lock()
Definition: CommonMethods.py:82
createfilelist.int
int
Definition: createfilelist.py:10
edm::ZombieKillerService::m_numberChecksWhenNotAlive
std::atomic< unsigned int > m_numberChecksWhenNotAlive
Definition: ZombieKillerService.cc:41
edm::ActivityRegistry::watchPreModuleGlobalEndRun
void watchPreModuleGlobalEndRun(PreModuleGlobalEndRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:800
edm::ZombieKillerService::stopThread
void stopThread()
Definition: ZombieKillerService.cc:183
edm::ActivityRegistry::watchPostModuleStreamEndLumi
void watchPostModuleStreamEndLumi(PostModuleStreamEndLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:779
edm::ZombieKillerService::m_jobDoneCondition
std::condition_variable m_jobDoneCondition
Definition: ZombieKillerService.cc:37
edm::ActivityRegistry::watchPostModuleEvent
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:677
edm::ActivityRegistry::watchPreModuleGlobalBeginRun
void watchPreModuleGlobalBeginRun(PreModuleGlobalBeginRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:786
edm::ActivityRegistry::watchPreSourceEvent
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:182
edm::RunIndex
Definition: RunIndex.h:32
edm::ActivityRegistry::watchPostModuleGlobalBeginLumi
void watchPostModuleGlobalBeginLumi(PostModuleGlobalBeginLumi::slot_type const &iSlot)
Definition: ActivityRegistry.h:821
ParameterSet.h
edm::ActivityRegistry::watchPostModuleStreamBeginRun
void watchPostModuleStreamBeginRun(PostModuleStreamBeginRun::slot_type const &iSlot)
Definition: ActivityRegistry.h:737
edm::ActivityRegistry::watchPreModuleEvent
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
Definition: ActivityRegistry.h:671
edm::ModuleCallingContext
Definition: ModuleCallingContext.h:29