CMS 3D CMS Logo

InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/task.h"
22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
24 #include <thread>
25 #include <sys/wait.h>
26 #include <sstream>
27 #include <cstring>
28 #include <poll.h>
29 #include <atomic>
30 #include <algorithm>
31 #include <vector>
32 #include <string>
33 #include <array>
34 
35 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
36 // version. This can break our stack trace printer. Avoid this by
37 // invoking the syscall directly.
38 #ifdef __linux__
39 #include <syscall.h>
40 #endif
41 
42 #include "TROOT.h"
43 #include "TError.h"
44 #include "TFile.h"
45 #include "TInterpreter.h"
46 #include "TH1.h"
47 #include "TSystem.h"
48 #include "TUnixSystem.h"
49 #include "TTree.h"
50 #include "TVirtualStreamerInfo.h"
51 
52 #include "TClassTable.h"
53 
54 #include <memory>
55 
56 namespace {
57  // size of static buffer allocated for listing module names following a
58  // stacktrace abort
59  constexpr std::size_t moduleBufferSize = 128;
60 }
61 
62 namespace edm {
64  class ParameterSet;
65  class ActivityRegistry;
66 
67  namespace service {
68  class InitRootHandlers : public RootHandlers {
69 
70  friend int cmssw_stacktrace(void *);
71 
72  public:
73  class ThreadTracker : public tbb::task_scheduler_observer {
74  public:
75  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
76 
77  ThreadTracker() : tbb::task_scheduler_observer() {
78  observe(true);
79  }
80  void on_scheduler_entry(bool) override {
81  // ensure thread local has been allocated; not necessary on Linux with
82  // the current cmsRun linkage, but could be an issue if the platform
83  // or linkage leads to "lazy" allocation of the thread local. By
84  // referencing it here we make sure it has been allocated and can be
85  // accessed safely from our signal handler.
87  threadIDs_.insert(pthread_self());
88  }
89  const Container_type& IDs() { return threadIDs_; }
90 
91  private:
92  Container_type threadIDs_;
93  };
94 
95  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
96  ~InitRootHandlers() override;
97 
98  static void fillDescriptions(ConfigurationDescriptions& descriptions);
99  static void stacktraceFromThread();
101  static int stackTracePause() { return stackTracePause_; }
102 
103  static std::vector<std::array<char,moduleBufferSize>> moduleListBuffers_;
104  static std::atomic<std::size_t> nextModule_, doneModules_;
105  private:
106  static char *const *getPstackArgv();
107  void enableWarnings_() override;
109  void willBeUsingThreads() override;
110 
111  void cachePidInfo();
112  static void stacktraceHelperThread();
113 
114  static const int pidStringLength_ = 200;
116  static char * const pstackArgv_[];
117  static int parentToChild_[2];
118  static int childToParent_[2];
119  static std::unique_ptr<std::thread> helperThread_;
121  static int stackTracePause_;
122 
127  std::shared_ptr<const void> sigBusHandler_;
128  std::shared_ptr<const void> sigSegvHandler_;
129  std::shared_ptr<const void> sigIllHandler_;
130  std::shared_ptr<const void> sigTermHandler_;
131  std::shared_ptr<const void> sigAbrtHandler_;
132  };
133 
134  inline
136  return true;
137  }
138 
139  } // end of namespace service
140 } // end of namespace edm
141 
142 namespace edm {
143  namespace service {
144  int cmssw_stacktrace(void *);
145  }
146 }
147 
148 namespace {
150 
151  bool s_ignoreEverything = false;
152 
153  template<std::size_t SIZE>
154  bool find_if_string(const std::string& search, const std::array<const char* const,SIZE>& substrs){
155  return (std::find_if(substrs.begin(), substrs.end(), [&search](const char* const s) -> bool { return (search.find(s) != std::string::npos); }) != substrs.end());
156  }
157 
158  constexpr std::array<const char* const,8> in_message{{
159  "no dictionary for class",
160  "already in TClassTable",
161  "matrix not positive definite",
162  "not a TStreamerInfo object",
163  "Problems declaring payload",
164  "Announced number of args different from the real number of argument passed", // Always printed if gDebug>0 - regardless of whether warning message is real.
165  "nbins is <=0 - set to nbins = 1",
166  "nbinsy is <=0 - set to nbinsy = 1"
167  }};
168 
169  constexpr std::array<const char* const,6> in_location{{
170  "Fit",
171  "TDecompChol::Solve",
172  "THistPainter::PaintInit",
173  "TUnixSystem::SetDisplay",
174  "TGClient::GetFontByName",
175  "Inverter::Dinv"
176  }};
177 
178  constexpr std::array<const char* const,3> in_message_print{{
179  "number of iterations was insufficient",
180  "bad integrand behavior",
181  "integral is divergent, or slowly convergent"
182  }};
183 
184  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
185 
186  bool die = false;
187 
188  // Translate ROOT severity level to MessageLogger severity level
189 
191 
192  if (level >= kFatal) {
194  } else if (level >= kSysError) {
196  } else if (level >= kError) {
198  } else if (level >= kWarning) {
200  }
201 
202  if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
204  }
205 
206  // Adapt C-strings to std::strings
207  // Arrange to report the error location as furnished by Root
208 
209  std::string el_location = "@SUB=?";
210  if (location != nullptr) el_location = std::string("@SUB=")+std::string(location);
211 
212  std::string el_message = "?";
213  if (message != nullptr) el_message = message;
214 
215  // Try to create a meaningful id string using knowledge of ROOT error messages
216  //
217  // id == "ROOT-ClassName" where ClassName is the affected class
218  // else "ROOT/ClassName" where ClassName is the error-declaring class
219  // else "ROOT"
220 
221  std::string el_identifier = "ROOT";
222 
223  std::string precursor("class ");
224  size_t index1 = el_message.find(precursor);
225  if (index1 != std::string::npos) {
226  size_t index2 = index1 + precursor.length();
227  size_t index3 = el_message.find_first_of(" :", index2);
228  if (index3 != std::string::npos) {
229  size_t substrlen = index3-index2;
230  el_identifier += "-";
231  el_identifier += el_message.substr(index2,substrlen);
232  }
233  } else {
234  index1 = el_location.find("::");
235  if (index1 != std::string::npos) {
236  el_identifier += "/";
237  el_identifier += el_location.substr(0, index1);
238  }
239  }
240 
241  // Intercept some messages and upgrade the severity
242 
243  if ((el_location.find("TBranchElement::Fill") != std::string::npos)
244  && (el_message.find("fill branch") != std::string::npos)
245  && (el_message.find("address") != std::string::npos)
246  && (el_message.find("not set") != std::string::npos)) {
248  }
249 
250  if ((el_message.find("Tree branches") != std::string::npos)
251  && (el_message.find("different numbers of entries") != std::string::npos)) {
253  }
254 
255 
256  // Intercept some messages and downgrade the severity
257 
258  if (find_if_string(el_message,in_message) ||
259  find_if_string(el_location,in_location) ||
260  (level < kError and (el_location.find("CINTTypedefBuilder::Setup")!= std::string::npos) and (el_message.find("possible entries are in use!") != std::string::npos)))
261  {
263  }
264 
265  // These are a special case because we do not want them to
266  // be fatal, but we do want an error to print.
267  bool alreadyPrinted = false;
268  if (find_if_string(el_message,in_message_print))
269  {
271  edm::LogError("Root_Error") << el_location << el_message;
272  alreadyPrinted = true;
273  }
274 
275  if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
276  // Don't throw if the message is just informational.
277  die = false;
278  } else {
279  die = true;
280  }
281 
282  // Feed the message to the MessageLogger and let it choose to suppress or not.
283 
284  // Root has declared a fatal error. Throw an EDMException unless the
285  // message corresponds to a pending signal. In that case, do not throw
286  // but let the OS deal with the signal in the usual way.
287  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
288  std::ostringstream sstr;
289  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
290  edm::Exception except(edm::errors::FatalRootError, sstr.str());
291  except.addAdditionalInfo(except.message());
292  except.clearMessage();
293  throw except;
294 
295  }
296 
297  // Typically, we get here only for informational messages,
298  // but we leave the other code in just in case we change
299  // the criteria for throwing.
300  if (!alreadyPrinted) {
301  if (el_severity == edm::RootHandlers::SeverityLevel::kFatal) {
302  edm::LogError("Root_Fatal") << el_location << el_message;
303  } else if (el_severity == edm::RootHandlers::SeverityLevel::kSysError) {
304  edm::LogError("Root_Severe") << el_location << el_message;
305  } else if (el_severity == edm::RootHandlers::SeverityLevel::kError) {
306  edm::LogError("Root_Error") << el_location << el_message;
307  } else if (el_severity == edm::RootHandlers::SeverityLevel::kWarning) {
308  edm::LogWarning("Root_Warning") << el_location << el_message ;
309  } else if (el_severity == edm::RootHandlers::SeverityLevel::kInfo) {
310  edm::LogInfo("Root_Information") << el_location << el_message ;
311  }
312  }
313  }
314 
315  void RootErrorHandler(int level, bool, char const* location, char const* message) {
316  RootErrorHandlerImpl(level, location, message);
317  }
318 
319  extern "C" {
320  void set_default_signals() {
321  signal(SIGILL, SIG_DFL);
322  signal(SIGSEGV, SIG_DFL);
323  signal(SIGBUS, SIG_DFL);
324  signal(SIGTERM, SIG_DFL);
325  signal(SIGABRT, SIG_DFL);
326  }
327 
328  static int full_write(int fd, const char *text)
329  {
330  const char *buffer = text;
331  size_t count = strlen(text);
332  ssize_t written = 0;
333  while (count)
334  {
335  written = write(fd, buffer, count);
336  if (written == -1)
337  {
338  if (errno == EINTR) {continue;}
339  else {return -errno;}
340  }
341  count -= written;
342  buffer += written;
343  }
344  return 0;
345  }
346 
347  static int full_read(int fd, char *inbuf, size_t len, int timeout_s=-1)
348  {
349  char *buf = inbuf;
350  size_t count = len;
351  ssize_t complete = 0;
352  std::chrono::time_point<std::chrono::steady_clock> end_time = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
353  int flags;
354  if (timeout_s < 0)
355  {
356  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
357  }
358  else if ((-1 == (flags = fcntl(fd, F_GETFL))))
359  {
360  return -errno;
361  }
362  if ((flags & O_NONBLOCK) != O_NONBLOCK)
363  {
364  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
365  {
366  return -errno;
367  }
368  }
369  while (count)
370  {
371  if (timeout_s >= 0)
372  {
373  struct pollfd poll_info{fd, POLLIN, 0};
374  int ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(end_time-std::chrono::steady_clock::now()).count();
375  if (ms_remaining > 0)
376  {
377  int rc = poll(&poll_info, 1, ms_remaining);
378  if (rc <= 0)
379  {
380  if (rc < 0) {
381  if (errno == EINTR || errno == EAGAIN) { continue; }
382  rc = -errno;
383  } else {
384  rc = -ETIMEDOUT;
385  }
386  if ((flags & O_NONBLOCK) != O_NONBLOCK)
387  {
388  fcntl(fd, F_SETFL, flags);
389  }
390  return rc;
391  }
392  }
393  else if (ms_remaining < 0)
394  {
395  if ((flags & O_NONBLOCK) != O_NONBLOCK)
396  {
397  fcntl(fd, F_SETFL, flags);
398  }
399  return -ETIMEDOUT;
400  }
401  }
402  complete = read(fd, buf, count);
403  if (complete == -1)
404  {
405  if (errno == EINTR) {continue;}
406  else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {continue;}
407  else
408  {
409  int orig_errno = errno;
410  if ((flags & O_NONBLOCK) != O_NONBLOCK)
411  {
412  fcntl(fd, F_SETFL, flags);
413  }
414  return -orig_errno;
415  }
416  }
417  count -= complete;
418  buf += complete;
419  }
420  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
421  fcntl(fd, F_SETFL, flags);
422  }
423  return 0;
424  }
425 
426  static int full_cerr_write(const char *text)
427  {
428  return full_write(2, text);
429  }
430 
431 // these signals are only used inside the stacktrace signal handler,
432 // so common signals can be used. They do have to be different, since
433 // we do not set SA_NODEFER, and RESUME must be a signal that will
434 // cause sleep() to return early.
435 #if defined(SIGRTMAX)
436 #define PAUSE_SIGNAL SIGRTMAX
437 #define RESUME_SIGNAL SIGRTMAX-1
438 #elif defined(SIGINFO) // macOS/BSD
439 #define PAUSE_SIGNAL SIGINFO
440 #define RESUME_SIGNAL SIGALRM
441 #endif
442 
443  // does nothing, here only to interrupt the sleep() in the pause handler
444  void sig_resume_handler(int sig, siginfo_t*, void*) {}
445 
446  // pause a thread so that a (slow) stacktrace will capture the current state
447  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
448  using namespace edm::service;
449 
450 #ifdef RESUME_SIGNAL
451  sigset_t sigset;
452  sigemptyset(&sigset);
453  sigaddset(&sigset, RESUME_SIGNAL);
454  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
455 #endif
456  // sleep interrrupts on a handled delivery of the resume signal
458 
459  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
462  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
463 
464  strlcpy(buff, "\nModule: ", moduleBufferSize);
466  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
467  strlcat(buff, ":", moduleBufferSize);
468  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
469  } else {
470  strlcat(buff, "none", moduleBufferSize);
471  }
473  }
474  }
475  }
476 
477  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
478  using namespace edm::service;
479 
480  const auto& tids = InitRootHandlers::threadIDs();
481 
482  const auto self = pthread_self();
483 #ifdef PAUSE_SIGNAL
484  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
485  // install the "pause" handler
486  struct sigaction act;
487  act.sa_sigaction = sig_pause_for_stacktrace;
488  act.sa_flags = 0;
489  sigemptyset(&act.sa_mask);
490  sigaction(PAUSE_SIGNAL, &act, nullptr);
491 
492  // unblock pause signal globally, resume is unblocked in the pause handler
493  sigset_t pausesigset;
494  sigemptyset(&pausesigset);
495  sigaddset(&pausesigset, PAUSE_SIGNAL);
496  sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
497 
498  // send a pause signal to all CMSSW/TBB threads other than self
499  for (auto id : tids) {
500  if (self != id) {
501  pthread_kill(id, PAUSE_SIGNAL);
502  }
503  }
504 
505 #ifdef RESUME_SIGNAL
506  // install the "resume" handler
507  act.sa_sigaction = sig_resume_handler;
508  sigaction(RESUME_SIGNAL, &act, nullptr);
509 #endif
510  }
511 #endif
512 
513  const char* signalname = "unknown";
514  switch (sig) {
515  case SIGBUS:
516  {
517  signalname = "bus error";
518  break;
519  }
520  case SIGSEGV:
521  {
522  signalname = "segmentation violation";
523  break;
524  }
525  case SIGILL:
526  {
527  signalname = "illegal instruction";
528  break;
529  }
530  case SIGTERM:
531  {
532  signalname = "external termination request";
533  break;
534  }
535  case SIGABRT:
536  {
537  signalname = "abort signal";
538  break;
539  }
540  default:
541  break;
542  }
543  full_cerr_write("\n\nA fatal system signal has occurred: ");
544  full_cerr_write(signalname);
545  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
546 
548 
549  // resume the signal handlers to store the current module; we are not guaranteed they
550  // will have time to store their modules, so there is a race condition; this could be
551  // avoided by storing the module information before sleeping, a change that may be
552  // made when we're convinced accessing the thread-local current module is safe.
553 #ifdef RESUME_SIGNAL
554  std::size_t notified = 0;
555  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
556  for (auto id : tids) {
557  if (self != id) {
558  if (pthread_kill(id, RESUME_SIGNAL) == 0) ++notified;
559  }
560  }
561  }
562 #endif
563 
564  full_cerr_write("\nCurrent Modules:\n");
565 
566  // Checking tids.count(self) ensures that we only try to access the current module in
567  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
568  // time the thread is registered, so any lazy allocation will have been done at that
569  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
570  // is allocated at exec time, not lazily.
571  if (tids.count(self) > 0) {
572  char buff[moduleBufferSize] = "\nModule: ";
574  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
575  strlcat(buff, ":", moduleBufferSize);
576  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
577  } else {
578  strlcat(buff, "none", moduleBufferSize);
579  }
580  strlcat(buff, " (crashed)", moduleBufferSize);
581  full_cerr_write(buff);
582  } else {
583  full_cerr_write("\nModule: non-CMSSW (crashed)");
584  }
585 
586 #ifdef PAUSE_SIGNAL
587  // wait a short interval for the paused threads to resume and fill in their module
588  // information, then print
589  if (InitRootHandlers::doneModules_.is_lock_free()) {
590  int spincount = 0;
591  timespec t = { 0, 1000 };
592  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) { nanosleep(&t, nullptr); }
593  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
594  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
595  }
596  }
597 #endif
598 
599  full_cerr_write("\n\nA fatal system signal has occurred: ");
600  full_cerr_write(signalname);
601  full_cerr_write("\n");
602 
603  // For these five known cases, re-raise the signal to get the correct
604  // exit code.
605  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT))
606  {
607  signal(sig, SIG_DFL);
608  raise(sig);
609  }
610  else
611  {
612  set_default_signals();
613  ::abort();
614  }
615  }
616 
617  void sig_abort(int sig, siginfo_t*, void*) {
618  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
619 
620  // re-raise the signal to get the correct exit code
621  signal(sig, SIG_DFL);
622  raise(sig);
623 
624  // shouldn't get here
625  set_default_signals();
626  ::sleep(10);
627  ::abort();
628  }
629  }
630 } // end of unnamed namespace
631 
632 namespace edm {
633  namespace service {
634 
635  /*
636  * We've run into issues where GDB fails to print the thread which calls clone().
637  * To avoid this problem, we have an alternate approach below where the signal handler
638  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
639  * invocation; we don't care if that thread is missing from the traceback in this case.
640  */
641  static void cmssw_stacktrace_fork();
642 
644  {
645  int toParent = childToParent_[1];
646  int fromParent = parentToChild_[0];
647  char buf[2]; buf[1] = '\0';
648 
649  while(true)
650  {
651  int result = full_read(fromParent, buf, 1);
652  if (result < 0)
653  {
654  // To avoid a deadlock (this function is NOT re-entrant), reset signals
655  // We never set them back to the CMSSW handler because we assume the parent
656  // thread will abort for us.
657  set_default_signals();
658  close(toParent);
659  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
660  full_cerr_write(strerror(-result));
661  full_cerr_write("\n");
662  ::abort();
663  }
664  if (buf[0] == '1')
665  {
666  set_default_signals();
668  full_write(toParent, buf);
669  }
670  else if (buf[0] == '2')
671  {
672  // We have just finished forking. Reload the file descriptors for thread
673  // communication.
674  close(toParent);
675  close(fromParent);
676  toParent = childToParent_[1];
677  fromParent = parentToChild_[0];
678  }
679  else if (buf[0] == '3')
680  {
681  break;
682  }
683  else
684  {
685  set_default_signals();
686  close(toParent);
687  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
688  full_cerr_write(buf);
689  full_cerr_write("\n");
690  ::abort();
691  }
692  }
693  }
694 
696  {
697  int result = full_write(parentToChild_[1], "1");
698  if (result < 0)
699  {
700  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
701  full_cerr_write(strerror(-result));
702  full_cerr_write("\n");
703  return;
704  }
705  char buf[2]; buf[1] = '\0';
706  if ((result = full_read(childToParent_[0], buf, 1, 5*60)) < 0)
707  {
708  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
709  if (result == -ETIMEDOUT)
710  {
711  full_cerr_write("timed out waiting for GDB to complete.");
712  }
713  else
714  {
715  full_cerr_write(strerror(-result));
716  }
717  full_cerr_write("\n");
718  return;
719  }
720  }
721 
723  {
724  char child_stack[4*1024];
725  char *child_stack_ptr = child_stack + 4*1024;
726  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
727  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
728  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
729  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
730  int pid =
731 #ifdef __linux__
732  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
733 #else
734  fork();
735  if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
736  if (pid == 0) { edm::service::cmssw_stacktrace(nullptr); }
737 #endif
738  if (pid == -1)
739  {
740  full_cerr_write("(Attempt to perform stack dump failed.)\n");
741  }
742  else
743  {
744  int status;
745  if (waitpid(pid, &status, 0) == -1)
746  {
747  full_cerr_write("(Failed to wait on stack dump output.)\n");
748  }
749  if (status)
750  {
751  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
752  }
753  }
754  }
755 
756  int cmssw_stacktrace(void * /*arg*/)
757  {
758  set_default_signals();
759 
761  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
762  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
763  // calls dlsym.
764 #ifdef __linux__
765  syscall(SYS_execve, "/bin/sh", argv, __environ);
766 #else
767  execv("/bin/sh", argv);
768 #endif
769  ::abort();
770  return 1;
771  }
772 
773  static char pstackName[] = "(CMSSW stack trace helper)";
774  static char dashC[] = "-c";
777  int InitRootHandlers::parentToChild_[2] = {-1, -1};
778  int InitRootHandlers::childToParent_[2] = {-1, -1};
779  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
781  std::vector<std::array<char,moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
782  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
784 
785 
787  : RootHandlers(),
788  unloadSigHandler_(pset.getUntrackedParameter<bool> ("UnloadRootSigHandler")),
789  resetErrHandler_(pset.getUntrackedParameter<bool> ("ResetRootErrHandler")),
790  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
791  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool> ("AutoLibraryLoader"))
792  {
793  stackTracePause_ = pset.getUntrackedParameter<int> ("StackTracePauseTime");
794 
795  if(unloadSigHandler_) {
796  // Deactivate all the Root signal handlers and restore the system defaults
797  gSystem->ResetSignal(kSigChild);
798  gSystem->ResetSignal(kSigBus);
799  gSystem->ResetSignal(kSigSegmentationViolation);
800  gSystem->ResetSignal(kSigIllegalInstruction);
801  gSystem->ResetSignal(kSigSystem);
802  gSystem->ResetSignal(kSigPipe);
803  gSystem->ResetSignal(kSigAlarm);
804  gSystem->ResetSignal(kSigUrgent);
805  gSystem->ResetSignal(kSigFloatingException);
806  gSystem->ResetSignal(kSigWindowChanged);
807  } else if(pset.getUntrackedParameter<bool>("AbortOnSignal")){
808  cachePidInfo();
809 
810  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
811  // it requires an TApplication to be instantiated which causes problems
812  gSystem->ResetSignal(kSigBus);
813  gSystem->ResetSignal(kSigSegmentationViolation);
814  gSystem->ResetSignal(kSigIllegalInstruction);
815  installCustomHandler(SIGBUS,sig_dostack_then_abort);
816  sigBusHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
817  installCustomHandler(SIGBUS,sig_abort);
818  });
819  installCustomHandler(SIGSEGV,sig_dostack_then_abort);
820  sigSegvHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
821  installCustomHandler(SIGSEGV,sig_abort);
822  });
823  installCustomHandler(SIGILL,sig_dostack_then_abort);
824  sigIllHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
825  installCustomHandler(SIGILL,sig_abort);
826  });
827  installCustomHandler(SIGTERM,sig_dostack_then_abort);
828  sigTermHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
829  installCustomHandler(SIGTERM,sig_abort);
830  });
831  installCustomHandler(SIGABRT,sig_dostack_then_abort);
832  sigAbrtHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
833  signal(SIGABRT,SIG_DFL); // release SIGABRT to default
834  });
835  }
836 
837  iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds){
838  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
839  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
840  }
841  });
842 
843  if(resetErrHandler_) {
844 
845  // Replace the Root error handler with one that uses the MessageLogger
846  SetErrorHandler(RootErrorHandler);
847  }
848 
849  // Enable automatic Root library loading.
850  if(autoLibraryLoader_) {
851  gInterpreter->SetClassAutoloading(1);
852  }
853 
854  // Set ROOT parameters.
855  TTree::SetMaxTreeSize(kMaxLong64);
856  TH1::AddDirectory(kFALSE);
857  //G__SetCatchException(0);
858 
859  // Set custom streamers
861 
862  // Load the library containing dictionaries for std:: classes, if not already loaded.
863  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int> >))) {
864  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
865  }
866 
867  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
868  if(debugLevel >0) {
869  gDebug = debugLevel;
870  }
871 
872  // Enable Root implicit multi-threading
873  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
874  if (imt && not ROOT::IsImplicitMTEnabled()) {
875  ROOT::EnableImplicitMT();
876  }
877  }
878 
880  // close all open ROOT files
881  TIter iter(gROOT->GetListOfFiles());
882  TObject *obj = nullptr;
883  while(nullptr != (obj = iter.Next())) {
884  TFile* f = dynamic_cast<TFile*>(obj);
885  if(f) {
886  // We get a new iterator each time,
887  // because closing a file can invalidate the iterator
888  f->Close();
889  iter = TIter(gROOT->GetListOfFiles());
890  }
891  }
892  }
893 
895  //Tell Root we want to be multi-threaded
896  ROOT::EnableThreadSafety();
897 
898  //When threading, also have to keep ROOT from logging all TObjects into a list
899  TObject::SetObjectStat(false);
900 
901  //Have to avoid having Streamers modify themselves after they have been used
902  TVirtualStreamerInfo::Optimize(false);
903  }
904 
907  desc.setComment("Centralized interface to ROOT.");
908  desc.addUntracked<bool>("UnloadRootSigHandler", false)
909  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
910  desc.addUntracked<bool>("ResetRootErrHandler", true)
911  ->setComment("If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
912  desc.addUntracked<bool>("AutoLibraryLoader", true)
913  ->setComment("If True, enables automatic loading of data dictionaries.");
914  desc.addUntracked<bool>("LoadAllDictionaries",false)
915  ->setComment("If True, loads all ROOT dictionaries.");
916  desc.addUntracked<bool>("EnableIMT",true)
917  ->setComment("If True, calls ROOT::EnableImplicitMT().");
918  desc.addUntracked<bool>("AbortOnSignal",true)
919  ->setComment("If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
920  desc.addUntracked<int>("DebugLevel",0)
921  ->setComment("Sets ROOT's gDebug value.");
922  desc.addUntracked<int>("StackTracePauseTime", 300)
923  ->setComment("Seconds to pause other threads during stack trace.");
924  descriptions.add("InitRootHandlers", desc);
925  }
926 
927  char *const *
929  return pstackArgv_;
930  }
931 
932  void
934  s_ignoreWarnings = edm::RootHandlers::SeverityLevel::kInfo;
935  }
936 
937  void
939  s_ignoreWarnings = level;
940  }
941 
942  void
944  {
945  if(helperThread_) {
946  //Another InitRootHandlers was initialized in this job, possibly
947  // because multiple EventProcessors are being used.
948  //In that case, we are already all setup
949  return;
950  }
951  if (snprintf(pidString_, pidStringLength_-1, "gdb -quiet -p %d 2>&1 <<EOF |\n"
952  "set width 0\n"
953  "set height 0\n"
954  "set pagination no\n"
955  "thread apply all bt\n"
956  "EOF\n"
957  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >= pidStringLength_)
958  {
959  std::ostringstream sstr;
960  sstr << "Unable to pre-allocate stacktrace handler information";
961  edm::Exception except(edm::errors::OtherCMS, sstr.str());
962  throw except;
963  }
964 
965  // These are initialized to -1; harmless to close an invalid FD.
966  // If this is called post-fork, we don't want to be communicating on
967  // these FDs as they are used internally by the parent.
968  close(childToParent_[0]);
969  close(childToParent_[1]);
970  childToParent_[0] = -1; childToParent_[1] = -1;
971  close(parentToChild_[0]);
972  close(parentToChild_[1]);
973  parentToChild_[0] = -1; parentToChild_[1] = -1;
974 
975  if (-1 == pipe2(childToParent_, O_CLOEXEC))
976  {
977  std::ostringstream sstr;
978  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
979  edm::Exception except(edm::errors::OtherCMS, sstr.str());
980  throw except;
981  }
982 
983  if (-1 == pipe2(parentToChild_, O_CLOEXEC))
984  {
985  close(childToParent_[0]); close(childToParent_[1]);
986  childToParent_[0] = -1; childToParent_[1] = -1;
987  std::ostringstream sstr;
988  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
989  edm::Exception except(edm::errors::OtherCMS, sstr.str());
990  throw except;
991  }
992 
993  helperThread_.reset(new std::thread(stacktraceHelperThread));
994  helperThread_->detach();
995  }
996 
997  } // end of namespace service
998 } // end of namespace edm
999 
1003 
size
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
T getUntrackedParameter(std::string const &, T const &) const
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:117
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
Definition: IOVProxy.cc:314
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:99
static ModuleCallingContext const * getCurrentModuleOnThread()
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
#define constexpr
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:82
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:235
double f[11][100]
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
Definition: TBBSession.h:68
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
static void fillDescriptions(ConfigurationDescriptions &descriptions)
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
def write(self, setup)