CMS 3D CMS Logo

InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/task.h"
22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
24 #include <thread>
25 #include <sys/wait.h>
26 #include <sstream>
27 #include <cstring>
28 #include <poll.h>
29 #include <atomic>
30 #include <algorithm>
31 #include <vector>
32 #include <string>
33 #include <array>
34 
35 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
36 // version. This can break our stack trace printer. Avoid this by
37 // invoking the syscall directly.
38 #ifdef __linux__
39 #include <syscall.h>
40 #endif
41 
42 #include "TROOT.h"
43 #include "TError.h"
44 #include "TFile.h"
45 #include "TInterpreter.h"
46 #include "TH1.h"
47 #include "TSystem.h"
48 #include "TUnixSystem.h"
49 #include "TTree.h"
50 #include "TVirtualStreamerInfo.h"
51 
52 #include "TClassTable.h"
53 
54 #include <memory>
55 
56 namespace {
57  // size of static buffer allocated for listing module names following a
58  // stacktrace abort
59  constexpr std::size_t moduleBufferSize = 128;
60 } // namespace
61 
62 namespace edm {
64  class ParameterSet;
65  class ActivityRegistry;
66 
67  namespace service {
68  class InitRootHandlers : public RootHandlers {
69  friend int cmssw_stacktrace(void*);
70 
71  public:
72  class ThreadTracker : public tbb::task_scheduler_observer {
73  public:
74  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
75 
76  ThreadTracker() : tbb::task_scheduler_observer() { observe(true); }
77  void on_scheduler_entry(bool) override {
78  // ensure thread local has been allocated; not necessary on Linux with
79  // the current cmsRun linkage, but could be an issue if the platform
80  // or linkage leads to "lazy" allocation of the thread local. By
81  // referencing it here we make sure it has been allocated and can be
82  // accessed safely from our signal handler.
84  threadIDs_.insert(pthread_self());
85  }
86  const Container_type& IDs() { return threadIDs_; }
87 
88  private:
89  Container_type threadIDs_;
90  };
91 
92  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
93  ~InitRootHandlers() override;
94 
95  static void fillDescriptions(ConfigurationDescriptions& descriptions);
96  static void stacktraceFromThread();
98  static int stackTracePause() { return stackTracePause_; }
99 
100  static std::vector<std::array<char, moduleBufferSize>> moduleListBuffers_;
101  static std::atomic<std::size_t> nextModule_, doneModules_;
102 
103  private:
104  static char* const* getPstackArgv();
105  void enableWarnings_() override;
107  void willBeUsingThreads() override;
108 
109  void cachePidInfo();
110  static void stacktraceHelperThread();
111 
112  static const int pidStringLength_ = 200;
114  static char* const pstackArgv_[];
115  static int parentToChild_[2];
116  static int childToParent_[2];
117  static std::unique_ptr<std::thread> helperThread_;
119  static int stackTracePause_;
120 
125  std::shared_ptr<const void> sigBusHandler_;
126  std::shared_ptr<const void> sigSegvHandler_;
127  std::shared_ptr<const void> sigIllHandler_;
128  std::shared_ptr<const void> sigTermHandler_;
129  std::shared_ptr<const void> sigAbrtHandler_;
130  };
131 
132  inline bool isProcessWideService(InitRootHandlers const*) { return true; }
133 
134  } // end of namespace service
135 } // end of namespace edm
136 
137 namespace edm {
138  namespace service {
139  int cmssw_stacktrace(void*);
140  }
141 } // namespace edm
142 
143 namespace {
145 
146  bool s_ignoreEverything = false;
147 
148  template <std::size_t SIZE>
149  bool find_if_string(const std::string& search, const std::array<const char* const, SIZE>& substrs) {
150  return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool {
151  return (search.find(s) != std::string::npos);
152  }) != substrs.end());
153  }
154 
155  constexpr std::array<const char* const, 8> in_message{
156  {"no dictionary for class",
157  "already in TClassTable",
158  "matrix not positive definite",
159  "not a TStreamerInfo object",
160  "Problems declaring payload",
161  "Announced number of args different from the real number of argument passed", // Always printed if gDebug>0 - regardless of whether warning message is real.
162  "nbins is <=0 - set to nbins = 1",
163  "nbinsy is <=0 - set to nbinsy = 1"}};
164 
165  constexpr std::array<const char* const, 6> in_location{{"Fit",
166  "TDecompChol::Solve",
167  "THistPainter::PaintInit",
168  "TUnixSystem::SetDisplay",
169  "TGClient::GetFontByName",
170  "Inverter::Dinv"}};
171 
172  constexpr std::array<const char* const, 3> in_message_print{{"number of iterations was insufficient",
173  "bad integrand behavior",
174  "integral is divergent, or slowly convergent"}};
175 
176  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
177  bool die = false;
178 
179  // Translate ROOT severity level to MessageLogger severity level
180 
182 
183  if (level >= kFatal) {
185  } else if (level >= kSysError) {
187  } else if (level >= kError) {
189  } else if (level >= kWarning) {
191  }
192 
193  if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
195  }
196 
197  // Adapt C-strings to std::strings
198  // Arrange to report the error location as furnished by Root
199 
200  std::string el_location = "@SUB=?";
201  if (location != nullptr)
202  el_location = std::string("@SUB=") + std::string(location);
203 
204  std::string el_message = "?";
205  if (message != nullptr)
206  el_message = message;
207 
208  // Try to create a meaningful id string using knowledge of ROOT error messages
209  //
210  // id == "ROOT-ClassName" where ClassName is the affected class
211  // else "ROOT/ClassName" where ClassName is the error-declaring class
212  // else "ROOT"
213 
214  std::string el_identifier = "ROOT";
215 
216  std::string precursor("class ");
217  size_t index1 = el_message.find(precursor);
218  if (index1 != std::string::npos) {
219  size_t index2 = index1 + precursor.length();
220  size_t index3 = el_message.find_first_of(" :", index2);
221  if (index3 != std::string::npos) {
222  size_t substrlen = index3 - index2;
223  el_identifier += "-";
224  el_identifier += el_message.substr(index2, substrlen);
225  }
226  } else {
227  index1 = el_location.find("::");
228  if (index1 != std::string::npos) {
229  el_identifier += "/";
230  el_identifier += el_location.substr(0, index1);
231  }
232  }
233 
234  // Intercept some messages and upgrade the severity
235 
236  if ((el_location.find("TBranchElement::Fill") != std::string::npos) &&
237  (el_message.find("fill branch") != std::string::npos) && (el_message.find("address") != std::string::npos) &&
238  (el_message.find("not set") != std::string::npos)) {
240  }
241 
242  if ((el_message.find("Tree branches") != std::string::npos) &&
243  (el_message.find("different numbers of entries") != std::string::npos)) {
245  }
246 
247  // Intercept some messages and downgrade the severity
248 
249  if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
250  (level < kError and (el_location.find("CINTTypedefBuilder::Setup") != std::string::npos) and
251  (el_message.find("possible entries are in use!") != std::string::npos))) {
253  }
254 
255  // These are a special case because we do not want them to
256  // be fatal, but we do want an error to print.
257  bool alreadyPrinted = false;
258  if (find_if_string(el_message, in_message_print)) {
260  edm::LogError("Root_Error") << el_location << el_message;
261  alreadyPrinted = true;
262  }
263 
264  if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
265  // Don't throw if the message is just informational.
266  die = false;
267  } else {
268  die = true;
269  }
270 
271  // Feed the message to the MessageLogger and let it choose to suppress or not.
272 
273  // Root has declared a fatal error. Throw an EDMException unless the
274  // message corresponds to a pending signal. In that case, do not throw
275  // but let the OS deal with the signal in the usual way.
276  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
277  std::ostringstream sstr;
278  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
279  edm::Exception except(edm::errors::FatalRootError, sstr.str());
280  except.addAdditionalInfo(except.message());
281  except.clearMessage();
282  throw except;
283  }
284 
285  // Typically, we get here only for informational messages,
286  // but we leave the other code in just in case we change
287  // the criteria for throwing.
288  if (!alreadyPrinted) {
289  if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
290  edm::LogError("Root_Fatal") << el_location << el_message;
291  } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
292  edm::LogError("Root_Severe") << el_location << el_message;
293  } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
294  edm::LogError("Root_Error") << el_location << el_message;
295  } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
296  edm::LogWarning("Root_Warning") << el_location << el_message;
297  } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
298  edm::LogInfo("Root_Information") << el_location << el_message;
299  }
300  }
301  }
302 
303  void RootErrorHandler(int level, bool, char const* location, char const* message) {
304  RootErrorHandlerImpl(level, location, message);
305  }
306 
307  extern "C" {
308  void set_default_signals() {
309  signal(SIGILL, SIG_DFL);
310  signal(SIGSEGV, SIG_DFL);
311  signal(SIGBUS, SIG_DFL);
312  signal(SIGTERM, SIG_DFL);
313  signal(SIGABRT, SIG_DFL);
314  }
315 
316  static int full_write(int fd, const char* text) {
317  const char* buffer = text;
318  size_t count = strlen(text);
319  ssize_t written = 0;
320  while (count) {
321  written = write(fd, buffer, count);
322  if (written == -1) {
323  if (errno == EINTR) {
324  continue;
325  } else {
326  return -errno;
327  }
328  }
329  count -= written;
330  buffer += written;
331  }
332  return 0;
333  }
334 
335  static int full_read(int fd, char* inbuf, size_t len, int timeout_s = -1) {
336  char* buf = inbuf;
337  size_t count = len;
338  ssize_t complete = 0;
339  std::chrono::time_point<std::chrono::steady_clock> end_time =
341  int flags;
342  if (timeout_s < 0) {
343  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
344  } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
345  return -errno;
346  }
347  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
348  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
349  return -errno;
350  }
351  }
352  while (count) {
353  if (timeout_s >= 0) {
354  struct pollfd poll_info {
355  fd, POLLIN, 0
356  };
357  int ms_remaining =
358  std::chrono::duration_cast<std::chrono::milliseconds>(end_time - std::chrono::steady_clock::now()).count();
359  if (ms_remaining > 0) {
360  int rc = poll(&poll_info, 1, ms_remaining);
361  if (rc <= 0) {
362  if (rc < 0) {
363  if (errno == EINTR || errno == EAGAIN) {
364  continue;
365  }
366  rc = -errno;
367  } else {
368  rc = -ETIMEDOUT;
369  }
370  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
371  fcntl(fd, F_SETFL, flags);
372  }
373  return rc;
374  }
375  } else if (ms_remaining < 0) {
376  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
377  fcntl(fd, F_SETFL, flags);
378  }
379  return -ETIMEDOUT;
380  }
381  }
382  complete = read(fd, buf, count);
383  if (complete == -1) {
384  if (errno == EINTR) {
385  continue;
386  } else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
387  continue;
388  } else {
389  int orig_errno = errno;
390  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
391  fcntl(fd, F_SETFL, flags);
392  }
393  return -orig_errno;
394  }
395  }
396  count -= complete;
397  buf += complete;
398  }
399  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
400  fcntl(fd, F_SETFL, flags);
401  }
402  return 0;
403  }
404 
405  static int full_cerr_write(const char* text) { return full_write(2, text); }
406 
407 // these signals are only used inside the stacktrace signal handler,
408 // so common signals can be used. They do have to be different, since
409 // we do not set SA_NODEFER, and RESUME must be a signal that will
410 // cause sleep() to return early.
411 #if defined(SIGRTMAX)
412 #define PAUSE_SIGNAL SIGRTMAX
413 #define RESUME_SIGNAL SIGRTMAX - 1
414 #elif defined(SIGINFO) // macOS/BSD
415 #define PAUSE_SIGNAL SIGINFO
416 #define RESUME_SIGNAL SIGALRM
417 #endif
418 
419  // does nothing, here only to interrupt the sleep() in the pause handler
420  void sig_resume_handler(int sig, siginfo_t*, void*) {}
421 
422  // pause a thread so that a (slow) stacktrace will capture the current state
423  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
424  using namespace edm::service;
425 
426 #ifdef RESUME_SIGNAL
427  sigset_t sigset;
428  sigemptyset(&sigset);
429  sigaddset(&sigset, RESUME_SIGNAL);
430  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
431 #endif
432  // sleep interrrupts on a handled delivery of the resume signal
434 
435  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
438  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
439 
440  strlcpy(buff, "\nModule: ", moduleBufferSize);
442  strlcat(buff,
443  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
444  moduleBufferSize);
445  strlcat(buff, ":", moduleBufferSize);
446  strlcat(buff,
447  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
448  moduleBufferSize);
449  } else {
450  strlcat(buff, "none", moduleBufferSize);
451  }
453  }
454  }
455  }
456 
457  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
458  using namespace edm::service;
459 
460  const auto& tids = InitRootHandlers::threadIDs();
461 
462  const auto self = pthread_self();
463 #ifdef PAUSE_SIGNAL
464  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
465  // install the "pause" handler
466  struct sigaction act;
467  act.sa_sigaction = sig_pause_for_stacktrace;
468  act.sa_flags = 0;
469  sigemptyset(&act.sa_mask);
470  sigaction(PAUSE_SIGNAL, &act, nullptr);
471 
472  // unblock pause signal globally, resume is unblocked in the pause handler
473  sigset_t pausesigset;
474  sigemptyset(&pausesigset);
475  sigaddset(&pausesigset, PAUSE_SIGNAL);
476  sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
477 
478  // send a pause signal to all CMSSW/TBB threads other than self
479  for (auto id : tids) {
480  if (self != id) {
481  pthread_kill(id, PAUSE_SIGNAL);
482  }
483  }
484 
485 #ifdef RESUME_SIGNAL
486  // install the "resume" handler
487  act.sa_sigaction = sig_resume_handler;
488  sigaction(RESUME_SIGNAL, &act, nullptr);
489 #endif
490  }
491 #endif
492 
493  const char* signalname = "unknown";
494  switch (sig) {
495  case SIGBUS: {
496  signalname = "bus error";
497  break;
498  }
499  case SIGSEGV: {
500  signalname = "segmentation violation";
501  break;
502  }
503  case SIGILL: {
504  signalname = "illegal instruction";
505  break;
506  }
507  case SIGTERM: {
508  signalname = "external termination request";
509  break;
510  }
511  case SIGABRT: {
512  signalname = "abort signal";
513  break;
514  }
515  default:
516  break;
517  }
518  full_cerr_write("\n\nA fatal system signal has occurred: ");
519  full_cerr_write(signalname);
520  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
521 
523 
524  // resume the signal handlers to store the current module; we are not guaranteed they
525  // will have time to store their modules, so there is a race condition; this could be
526  // avoided by storing the module information before sleeping, a change that may be
527  // made when we're convinced accessing the thread-local current module is safe.
528 #ifdef RESUME_SIGNAL
529  std::size_t notified = 0;
530  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
531  for (auto id : tids) {
532  if (self != id) {
533  if (pthread_kill(id, RESUME_SIGNAL) == 0)
534  ++notified;
535  }
536  }
537  }
538 #endif
539 
540  full_cerr_write("\nCurrent Modules:\n");
541 
542  // Checking tids.count(self) ensures that we only try to access the current module in
543  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
544  // time the thread is registered, so any lazy allocation will have been done at that
545  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
546  // is allocated at exec time, not lazily.
547  if (tids.count(self) > 0) {
548  char buff[moduleBufferSize] = "\nModule: ";
550  strlcat(buff,
551  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(),
552  moduleBufferSize);
553  strlcat(buff, ":", moduleBufferSize);
554  strlcat(buff,
555  edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(),
556  moduleBufferSize);
557  } else {
558  strlcat(buff, "none", moduleBufferSize);
559  }
560  strlcat(buff, " (crashed)", moduleBufferSize);
561  full_cerr_write(buff);
562  } else {
563  full_cerr_write("\nModule: non-CMSSW (crashed)");
564  }
565 
566 #ifdef PAUSE_SIGNAL
567  // wait a short interval for the paused threads to resume and fill in their module
568  // information, then print
569  if (InitRootHandlers::doneModules_.is_lock_free()) {
570  int spincount = 0;
571  timespec t = {0, 1000};
572  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) {
573  nanosleep(&t, nullptr);
574  }
575  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
576  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
577  }
578  }
579 #endif
580 
581  full_cerr_write("\n\nA fatal system signal has occurred: ");
582  full_cerr_write(signalname);
583  full_cerr_write("\n");
584 
585  // For these five known cases, re-raise the signal to get the correct
586  // exit code.
587  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT)) {
588  signal(sig, SIG_DFL);
589  raise(sig);
590  } else {
591  set_default_signals();
592  ::abort();
593  }
594  }
595 
596  void sig_abort(int sig, siginfo_t*, void*) {
597  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
598 
599  // re-raise the signal to get the correct exit code
600  signal(sig, SIG_DFL);
601  raise(sig);
602 
603  // shouldn't get here
604  set_default_signals();
605  ::sleep(10);
606  ::abort();
607  }
608  }
609 } // end of unnamed namespace
610 
611 namespace edm {
612  namespace service {
613 
614  /*
615  * We've run into issues where GDB fails to print the thread which calls clone().
616  * To avoid this problem, we have an alternate approach below where the signal handler
617  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
618  * invocation; we don't care if that thread is missing from the traceback in this case.
619  */
620  static void cmssw_stacktrace_fork();
621 
623  int toParent = childToParent_[1];
624  int fromParent = parentToChild_[0];
625  char buf[2];
626  buf[1] = '\0';
627 
628  while (true) {
629  int result = full_read(fromParent, buf, 1);
630  if (result < 0) {
631  // To avoid a deadlock (this function is NOT re-entrant), reset signals
632  // We never set them back to the CMSSW handler because we assume the parent
633  // thread will abort for us.
634  set_default_signals();
635  close(toParent);
636  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
637  full_cerr_write(strerror(-result));
638  full_cerr_write("\n");
639  ::abort();
640  }
641  if (buf[0] == '1') {
642  set_default_signals();
644  full_write(toParent, buf);
645  } else if (buf[0] == '2') {
646  // We have just finished forking. Reload the file descriptors for thread
647  // communication.
648  close(toParent);
649  close(fromParent);
650  toParent = childToParent_[1];
651  fromParent = parentToChild_[0];
652  } else if (buf[0] == '3') {
653  break;
654  } else {
655  set_default_signals();
656  close(toParent);
657  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
658  full_cerr_write(buf);
659  full_cerr_write("\n");
660  ::abort();
661  }
662  }
663  }
664 
666  int result = full_write(parentToChild_[1], "1");
667  if (result < 0) {
668  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
669  full_cerr_write(strerror(-result));
670  full_cerr_write("\n");
671  return;
672  }
673  char buf[2];
674  buf[1] = '\0';
675  if ((result = full_read(childToParent_[0], buf, 1, 5 * 60)) < 0) {
676  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
677  if (result == -ETIMEDOUT) {
678  full_cerr_write("timed out waiting for GDB to complete.");
679  } else {
680  full_cerr_write(strerror(-result));
681  }
682  full_cerr_write("\n");
683  return;
684  }
685  }
686 
688  char child_stack[4 * 1024];
689  char* child_stack_ptr = child_stack + 4 * 1024;
690  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
691  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
692  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
693  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
694  int pid =
695 #ifdef __linux__
696  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM | CLONE_FS | SIGCHLD, nullptr);
697 #else
698  fork();
699  if (child_stack_ptr) {
700  } // Suppress 'unused variable' warning on non-Linux
701  if (pid == 0) {
703  }
704 #endif
705  if (pid == -1) {
706  full_cerr_write("(Attempt to perform stack dump failed.)\n");
707  } else {
708  int status;
709  if (waitpid(pid, &status, 0) == -1) {
710  full_cerr_write("(Failed to wait on stack dump output.)\n");
711  }
712  if (status) {
713  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
714  }
715  }
716  }
717 
718  int cmssw_stacktrace(void* /*arg*/) {
719  set_default_signals();
720 
722  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
723  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
724  // calls dlsym.
725 #ifdef __linux__
726  syscall(SYS_execve, "/bin/sh", argv, __environ);
727 #else
728  execv("/bin/sh", argv);
729 #endif
730  ::abort();
731  return 1;
732  }
733 
734  static char pstackName[] = "(CMSSW stack trace helper)";
735  static char dashC[] = "-c";
738  int InitRootHandlers::parentToChild_[2] = {-1, -1};
739  int InitRootHandlers::childToParent_[2] = {-1, -1};
740  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
742  std::vector<std::array<char, moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
743  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
745 
747  : RootHandlers(),
748  unloadSigHandler_(pset.getUntrackedParameter<bool>("UnloadRootSigHandler")),
749  resetErrHandler_(pset.getUntrackedParameter<bool>("ResetRootErrHandler")),
750  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
751  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool>("AutoLibraryLoader")) {
752  stackTracePause_ = pset.getUntrackedParameter<int>("StackTracePauseTime");
753 
754  if (unloadSigHandler_) {
755  // Deactivate all the Root signal handlers and restore the system defaults
756  gSystem->ResetSignal(kSigChild);
757  gSystem->ResetSignal(kSigBus);
758  gSystem->ResetSignal(kSigSegmentationViolation);
759  gSystem->ResetSignal(kSigIllegalInstruction);
760  gSystem->ResetSignal(kSigSystem);
761  gSystem->ResetSignal(kSigPipe);
762  gSystem->ResetSignal(kSigAlarm);
763  gSystem->ResetSignal(kSigUrgent);
764  gSystem->ResetSignal(kSigFloatingException);
765  gSystem->ResetSignal(kSigWindowChanged);
766  } else if (pset.getUntrackedParameter<bool>("AbortOnSignal")) {
767  cachePidInfo();
768 
769  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
770  // it requires an TApplication to be instantiated which causes problems
771  gSystem->ResetSignal(kSigBus);
772  gSystem->ResetSignal(kSigSegmentationViolation);
773  gSystem->ResetSignal(kSigIllegalInstruction);
774  installCustomHandler(SIGBUS, sig_dostack_then_abort);
775  sigBusHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGBUS, sig_abort); });
776  installCustomHandler(SIGSEGV, sig_dostack_then_abort);
777  sigSegvHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGSEGV, sig_abort); });
778  installCustomHandler(SIGILL, sig_dostack_then_abort);
779  sigIllHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGILL, sig_abort); });
780  installCustomHandler(SIGTERM, sig_dostack_then_abort);
781  sigTermHandler_ = std::shared_ptr<const void>(nullptr, [](void*) { installCustomHandler(SIGTERM, sig_abort); });
782  installCustomHandler(SIGABRT, sig_dostack_then_abort);
783  sigAbrtHandler_ = std::shared_ptr<const void>(nullptr, [](void*) {
784  signal(SIGABRT, SIG_DFL); // release SIGABRT to default
785  });
786  }
787 
788  iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds) {
789  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
790  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
791  }
792  });
793 
794  if (resetErrHandler_) {
795  // Replace the Root error handler with one that uses the MessageLogger
796  SetErrorHandler(RootErrorHandler);
797  }
798 
799  // Enable automatic Root library loading.
800  if (autoLibraryLoader_) {
801  gInterpreter->SetClassAutoloading(1);
802  }
803 
804  // Set ROOT parameters.
805  TTree::SetMaxTreeSize(kMaxLong64);
806  TH1::AddDirectory(kFALSE);
807  //G__SetCatchException(0);
808 
809  // Set custom streamers
811 
812  // Load the library containing dictionaries for std:: classes, if not already loaded.
813  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int>>))) {
814  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
815  }
816 
817  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
818  if (debugLevel > 0) {
819  gDebug = debugLevel;
820  }
821 
822  // Enable Root implicit multi-threading
823  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
824  if (imt && not ROOT::IsImplicitMTEnabled()) {
825  ROOT::EnableImplicitMT();
826  }
827  }
828 
830  // close all open ROOT files
831  TIter iter(gROOT->GetListOfFiles());
832  TObject* obj = nullptr;
833  while (nullptr != (obj = iter.Next())) {
834  TFile* f = dynamic_cast<TFile*>(obj);
835  if (f) {
836  // We get a new iterator each time,
837  // because closing a file can invalidate the iterator
838  f->Close();
839  iter = TIter(gROOT->GetListOfFiles());
840  }
841  }
842  }
843 
845  //Tell Root we want to be multi-threaded
846  ROOT::EnableThreadSafety();
847 
848  //When threading, also have to keep ROOT from logging all TObjects into a list
849  TObject::SetObjectStat(false);
850 
851  //Have to avoid having Streamers modify themselves after they have been used
852  TVirtualStreamerInfo::Optimize(false);
853  }
854 
857  desc.setComment("Centralized interface to ROOT.");
858  desc.addUntracked<bool>("UnloadRootSigHandler", false)
859  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
860  desc.addUntracked<bool>("ResetRootErrHandler", true)
861  ->setComment(
862  "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
863  desc.addUntracked<bool>("AutoLibraryLoader", true)
864  ->setComment("If True, enables automatic loading of data dictionaries.");
865  desc.addUntracked<bool>("LoadAllDictionaries", false)->setComment("If True, loads all ROOT dictionaries.");
866  desc.addUntracked<bool>("EnableIMT", true)->setComment("If True, calls ROOT::EnableImplicitMT().");
867  desc.addUntracked<bool>("AbortOnSignal", true)
868  ->setComment(
869  "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
870  "attempts to do a clean shutdown.");
871  desc.addUntracked<int>("DebugLevel", 0)->setComment("Sets ROOT's gDebug value.");
872  desc.addUntracked<int>("StackTracePauseTime", 300)
873  ->setComment("Seconds to pause other threads during stack trace.");
874  descriptions.add("InitRootHandlers", desc);
875  }
876 
877  char* const* InitRootHandlers::getPstackArgv() { return pstackArgv_; }
878 
880 
882 
884  if (helperThread_) {
885  //Another InitRootHandlers was initialized in this job, possibly
886  // because multiple EventProcessors are being used.
887  //In that case, we are already all setup
888  return;
889  }
890  if (snprintf(pidString_,
891  pidStringLength_ - 1,
892  "date; gdb -quiet -p %d 2>&1 <<EOF |\n"
893  "set width 0\n"
894  "set height 0\n"
895  "set pagination no\n"
896  "thread apply all bt\n"
897  "EOF\n"
898  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
899  getpid()) >= pidStringLength_) {
900  std::ostringstream sstr;
901  sstr << "Unable to pre-allocate stacktrace handler information";
902  edm::Exception except(edm::errors::OtherCMS, sstr.str());
903  throw except;
904  }
905 
906  // These are initialized to -1; harmless to close an invalid FD.
907  // If this is called post-fork, we don't want to be communicating on
908  // these FDs as they are used internally by the parent.
909  close(childToParent_[0]);
910  close(childToParent_[1]);
911  childToParent_[0] = -1;
912  childToParent_[1] = -1;
913  close(parentToChild_[0]);
914  close(parentToChild_[1]);
915  parentToChild_[0] = -1;
916  parentToChild_[1] = -1;
917 
918  if (-1 == pipe2(childToParent_, O_CLOEXEC)) {
919  std::ostringstream sstr;
920  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
921  edm::Exception except(edm::errors::OtherCMS, sstr.str());
922  throw except;
923  }
924 
925  if (-1 == pipe2(parentToChild_, O_CLOEXEC)) {
926  close(childToParent_[0]);
927  close(childToParent_[1]);
928  childToParent_[0] = -1;
929  childToParent_[1] = -1;
930  std::ostringstream sstr;
931  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
932  edm::Exception except(edm::errors::OtherCMS, sstr.str());
933  throw except;
934  }
935 
936  helperThread_.reset(new std::thread(stacktraceHelperThread));
937  helperThread_->detach();
938  }
939 
940  } // end of namespace service
941 } // end of namespace edm
942 
size
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:38
T getUntrackedParameter(std::string const &, T const &) const
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static void cmssw_stacktrace_fork()
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:315
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:98
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:109
static ModuleCallingContext const * getCurrentModuleOnThread()
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:74
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:169
double f[11][100]
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
Definition: TBBSession.h:68
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
static void fillDescriptions(ConfigurationDescriptions &descriptions)
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
fd
Definition: ztee.py:136
#define constexpr