CMS 3D CMS Logo

All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
InitRootHandlers.cc
Go to the documentation of this file.
2 
4 
20 
21 #include "tbb/task.h"
22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
24 #include <thread>
25 #include <sys/wait.h>
26 #include <sstream>
27 #include <cstring>
28 #include <poll.h>
29 #include <atomic>
30 
31 // WORKAROUND: At CERN, execv is replaced with a non-async-signal safe
32 // version. This can break our stack trace printer. Avoid this by
33 // invoking the syscall directly.
34 #ifdef __linux__
35 #include <syscall.h>
36 #endif
37 
38 #include "TROOT.h"
39 #include "TError.h"
40 #include "TFile.h"
41 #include "TInterpreter.h"
42 #include "TH1.h"
43 #include "TSystem.h"
44 #include "TUnixSystem.h"
45 #include "TTree.h"
46 #include "TVirtualStreamerInfo.h"
47 
48 #include "TClassTable.h"
49 
50 #include <memory>
51 
52 namespace {
53  // size of static buffer allocated for listing module names following a
54  // stacktrace abort
55  constexpr std::size_t moduleBufferSize = 128;
56 }
57 
58 namespace edm {
60  class ParameterSet;
61  class ActivityRegistry;
62 
63  namespace service {
64  class InitRootHandlers : public RootHandlers {
65 
66  friend int cmssw_stacktrace(void *);
67 
68  public:
69  class ThreadTracker : public tbb::task_scheduler_observer {
70  public:
71  typedef tbb::concurrent_unordered_set<pthread_t> Container_type;
72 
73  ThreadTracker() : tbb::task_scheduler_observer() {
74  observe(true);
75  }
76  void on_scheduler_entry(bool) override {
77  // ensure thread local has been allocated; not necessary on Linux with
78  // the current cmsRun linkage, but could be an issue if the platform
79  // or linkage leads to "lazy" allocation of the thread local. By
80  // referencing it here we make sure it has been allocated and can be
81  // accessed safely from our signal handler.
83  threadIDs_.insert(pthread_self());
84  }
85  const Container_type& IDs() { return threadIDs_; }
86 
87  private:
88  Container_type threadIDs_;
89  };
90 
91  explicit InitRootHandlers(ParameterSet const& pset, ActivityRegistry& iReg);
92  ~InitRootHandlers() override;
93 
94  static void fillDescriptions(ConfigurationDescriptions& descriptions);
95  static void stacktraceFromThread();
97  static int stackTracePause() { return stackTracePause_; }
98 
99  static std::vector<std::array<char,moduleBufferSize>> moduleListBuffers_;
100  static std::atomic<std::size_t> nextModule_, doneModules_;
101  private:
102  static char *const *getPstackArgv();
103  void enableWarnings_() override;
104  void ignoreWarnings_() override;
105  void willBeUsingThreads() override;
106 
107  void cachePidInfo();
108  static void stacktraceHelperThread();
109 
110  static const int pidStringLength_ = 200;
112  static char * const pstackArgv_[];
113  static int parentToChild_[2];
114  static int childToParent_[2];
115  static std::unique_ptr<std::thread> helperThread_;
117  static int stackTracePause_;
118 
123  std::shared_ptr<const void> sigBusHandler_;
124  std::shared_ptr<const void> sigSegvHandler_;
125  std::shared_ptr<const void> sigIllHandler_;
126  std::shared_ptr<const void> sigTermHandler_;
127  };
128 
129  inline
131  return true;
132  }
133 
134  } // end of namespace service
135 } // end of namespace edm
136 
137 namespace edm {
138  namespace service {
139  int cmssw_stacktrace(void *);
140  }
141 }
142 
143 namespace {
144  enum class SeverityLevel {
145  kInfo,
146  kWarning,
147  kError,
148  kSysError,
149  kFatal
150  };
151 
152  thread_local bool s_ignoreWarnings = false;
153 
154  bool s_ignoreEverything = false;
155 
156  void RootErrorHandlerImpl(int level, char const* location, char const* message) {
157 
158  bool die = false;
159 
160  // Translate ROOT severity level to MessageLogger severity level
161 
162  SeverityLevel el_severity = SeverityLevel::kInfo;
163 
164  if (level >= kFatal) {
165  el_severity = SeverityLevel::kFatal;
166  } else if (level >= kSysError) {
167  el_severity = SeverityLevel::kSysError;
168  } else if (level >= kError) {
169  el_severity = SeverityLevel::kError;
170  } else if (level >= kWarning) {
171  el_severity = s_ignoreWarnings ? SeverityLevel::kInfo : SeverityLevel::kWarning;
172  }
173 
174  if(s_ignoreEverything) {
175  el_severity = SeverityLevel::kInfo;
176  }
177 
178  // Adapt C-strings to std::strings
179  // Arrange to report the error location as furnished by Root
180 
181  std::string el_location = "@SUB=?";
182  if (location != nullptr) el_location = std::string("@SUB=")+std::string(location);
183 
184  std::string el_message = "?";
185  if (message != nullptr) el_message = message;
186 
187  // Try to create a meaningful id string using knowledge of ROOT error messages
188  //
189  // id == "ROOT-ClassName" where ClassName is the affected class
190  // else "ROOT/ClassName" where ClassName is the error-declaring class
191  // else "ROOT"
192 
193  std::string el_identifier = "ROOT";
194 
195  std::string precursor("class ");
196  size_t index1 = el_message.find(precursor);
197  if (index1 != std::string::npos) {
198  size_t index2 = index1 + precursor.length();
199  size_t index3 = el_message.find_first_of(" :", index2);
200  if (index3 != std::string::npos) {
201  size_t substrlen = index3-index2;
202  el_identifier += "-";
203  el_identifier += el_message.substr(index2,substrlen);
204  }
205  } else {
206  index1 = el_location.find("::");
207  if (index1 != std::string::npos) {
208  el_identifier += "/";
209  el_identifier += el_location.substr(0, index1);
210  }
211  }
212 
213  // Intercept some messages and upgrade the severity
214 
215  if ((el_location.find("TBranchElement::Fill") != std::string::npos)
216  && (el_message.find("fill branch") != std::string::npos)
217  && (el_message.find("address") != std::string::npos)
218  && (el_message.find("not set") != std::string::npos)) {
219  el_severity = SeverityLevel::kFatal;
220  }
221 
222  if ((el_message.find("Tree branches") != std::string::npos)
223  && (el_message.find("different numbers of entries") != std::string::npos)) {
224  el_severity = SeverityLevel::kFatal;
225  }
226 
227 
228  // Intercept some messages and downgrade the severity
229 
230  if ((el_message.find("no dictionary for class") != std::string::npos) ||
231  (el_message.find("already in TClassTable") != std::string::npos) ||
232  (el_message.find("matrix not positive definite") != std::string::npos) ||
233  (el_message.find("not a TStreamerInfo object") != std::string::npos) ||
234  (el_message.find("Problems declaring payload") != std::string::npos) ||
235  (el_message.find("Announced number of args different from the real number of argument passed") != std::string::npos) || // Always printed if gDebug>0 - regardless of whether warning message is real.
236  (el_location.find("Fit") != std::string::npos) ||
237  (el_location.find("TDecompChol::Solve") != std::string::npos) ||
238  (el_location.find("THistPainter::PaintInit") != std::string::npos) ||
239  (el_location.find("TUnixSystem::SetDisplay") != std::string::npos) ||
240  (el_location.find("TGClient::GetFontByName") != std::string::npos) ||
241  (el_location.find("Inverter::Dinv") != std::string::npos) ||
242  (el_message.find("nbins is <=0 - set to nbins = 1") != std::string::npos) ||
243  (el_message.find("nbinsy is <=0 - set to nbinsy = 1") != std::string::npos) ||
244  (level < kError and
245  (el_location.find("CINTTypedefBuilder::Setup")!= std::string::npos) and
246  (el_message.find("possible entries are in use!") != std::string::npos))) {
247  el_severity = SeverityLevel::kInfo;
248  }
249 
250  if (el_severity == SeverityLevel::kInfo) {
251  // Don't throw if the message is just informational.
252  die = false;
253  } else {
254  die = true;
255  }
256 
257  // Feed the message to the MessageLogger and let it choose to suppress or not.
258 
259  // Root has declared a fatal error. Throw an EDMException unless the
260  // message corresponds to a pending signal. In that case, do not throw
261  // but let the OS deal with the signal in the usual way.
262  if (die && (el_location != std::string("@SUB=TUnixSystem::DispatchSignals"))) {
263  std::ostringstream sstr;
264  sstr << "Fatal Root Error: " << el_location << "\n" << el_message << '\n';
265  edm::Exception except(edm::errors::FatalRootError, sstr.str());
266  except.addAdditionalInfo(except.message());
267  except.clearMessage();
268  throw except;
269 
270  }
271 
272  // Typically, we get here only for informational messages,
273  // but we leave the other code in just in case we change
274  // the criteria for throwing.
275  if (el_severity == SeverityLevel::kFatal) {
276  edm::LogError("Root_Fatal") << el_location << el_message;
277  } else if (el_severity == SeverityLevel::kSysError) {
278  edm::LogError("Root_Severe") << el_location << el_message;
279  } else if (el_severity == SeverityLevel::kError) {
280  edm::LogError("Root_Error") << el_location << el_message;
281  } else if (el_severity == SeverityLevel::kWarning) {
282  edm::LogWarning("Root_Warning") << el_location << el_message ;
283  } else if (el_severity == SeverityLevel::kInfo) {
284  edm::LogInfo("Root_Information") << el_location << el_message ;
285  }
286  }
287 
288  void RootErrorHandler(int level, bool, char const* location, char const* message) {
289  RootErrorHandlerImpl(level, location, message);
290  }
291 
292  extern "C" {
293 
294  static int full_write(int fd, const char *text)
295  {
296  const char *buffer = text;
297  size_t count = strlen(text);
298  ssize_t written = 0;
299  while (count)
300  {
301  written = write(fd, buffer, count);
302  if (written == -1)
303  {
304  if (errno == EINTR) {continue;}
305  else {return -errno;}
306  }
307  count -= written;
308  buffer += written;
309  }
310  return 0;
311  }
312 
313  static int full_read(int fd, char *inbuf, size_t len, int timeout_s=-1)
314  {
315  char *buf = inbuf;
316  size_t count = len;
317  ssize_t complete = 0;
318  std::chrono::time_point<std::chrono::steady_clock> end_time = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_s);
319  int flags;
320  if (timeout_s < 0)
321  {
322  flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later.
323  }
324  else if ((-1 == (flags = fcntl(fd, F_GETFL))))
325  {
326  return -errno;
327  }
328  if ((flags & O_NONBLOCK) != O_NONBLOCK)
329  {
330  if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK))
331  {
332  return -errno;
333  }
334  }
335  while (count)
336  {
337  if (timeout_s >= 0)
338  {
339  struct pollfd poll_info{fd, POLLIN, 0};
340  int ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(end_time-std::chrono::steady_clock::now()).count();
341  if (ms_remaining > 0)
342  {
343  int rc = poll(&poll_info, 1, ms_remaining);
344  if (rc <= 0)
345  {
346  if (rc < 0) {
347  if (errno == EINTR || errno == EAGAIN) { continue; }
348  rc = -errno;
349  } else {
350  rc = -ETIMEDOUT;
351  }
352  if ((flags & O_NONBLOCK) != O_NONBLOCK)
353  {
354  fcntl(fd, F_SETFL, flags);
355  }
356  return rc;
357  }
358  }
359  else if (ms_remaining < 0)
360  {
361  if ((flags & O_NONBLOCK) != O_NONBLOCK)
362  {
363  fcntl(fd, F_SETFL, flags);
364  }
365  return -ETIMEDOUT;
366  }
367  }
368  complete = read(fd, buf, count);
369  if (complete == -1)
370  {
371  if (errno == EINTR) {continue;}
372  else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {continue;}
373  else
374  {
375  int orig_errno = errno;
376  if ((flags & O_NONBLOCK) != O_NONBLOCK)
377  {
378  fcntl(fd, F_SETFL, flags);
379  }
380  return -orig_errno;
381  }
382  }
383  count -= complete;
384  buf += complete;
385  }
386  if ((flags & O_NONBLOCK) != O_NONBLOCK) {
387  fcntl(fd, F_SETFL, flags);
388  }
389  return 0;
390  }
391 
392  static int full_cerr_write(const char *text)
393  {
394  return full_write(2, text);
395  }
396 
397 // these signals are only used inside the stacktrace signal handler,
398 // so common signals can be used. They do have to be different, since
399 // we do not set SA_NODEFER, and RESUME must be a signal that will
400 // cause sleep() to return early.
401 #if defined(SIGRTMAX)
402 #define PAUSE_SIGNAL SIGRTMAX
403 #define RESUME_SIGNAL SIGRTMAX-1
404 #elif defined(SIGINFO) // macOS/BSD
405 #define PAUSE_SIGNAL SIGINFO
406 #define RESUME_SIGNAL SIGALRM
407 #endif
408 
409  // does nothing, here only to interrupt the sleep() in the pause handler
410  void sig_resume_handler(int sig, siginfo_t*, void*) {}
411 
412  // pause a thread so that a (slow) stacktrace will capture the current state
413  void sig_pause_for_stacktrace(int sig, siginfo_t*, void*) {
414  using namespace edm::service;
415 
416 #ifdef RESUME_SIGNAL
417  sigset_t sigset;
418  sigemptyset(&sigset);
419  sigaddset(&sigset, RESUME_SIGNAL);
420  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
421 #endif
422  // sleep interrrupts on a handled delivery of the resume signal
424 
425  if (InitRootHandlers::doneModules_.is_lock_free() && InitRootHandlers::nextModule_.is_lock_free()) {
428  char* buff = InitRootHandlers::moduleListBuffers_[i].data();
429 
430  strlcpy(buff, "\nModule: ", moduleBufferSize);
432  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
433  strlcat(buff, ":", moduleBufferSize);
434  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
435  } else {
436  strlcat(buff, "none", moduleBufferSize);
437  }
439  }
440  }
441  }
442 
443  void sig_dostack_then_abort(int sig, siginfo_t*, void*) {
444  using namespace edm::service;
445 
446  const auto& tids = InitRootHandlers::threadIDs();
447 
448  const auto self = pthread_self();
449 #ifdef PAUSE_SIGNAL
450  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
451  // install the "pause" handler
452  struct sigaction act;
453  act.sa_sigaction = sig_pause_for_stacktrace;
454  act.sa_flags = 0;
455  sigemptyset(&act.sa_mask);
456  sigaction(PAUSE_SIGNAL, &act, nullptr);
457 
458  // unblock pause signal globally, resume is unblocked in the pause handler
459  sigset_t pausesigset;
460  sigemptyset(&pausesigset);
461  sigaddset(&pausesigset, PAUSE_SIGNAL);
462  sigprocmask(SIG_UNBLOCK, &pausesigset, nullptr);
463 
464  // send a pause signal to all CMSSW/TBB threads other than self
465  for (auto id : tids) {
466  if (self != id) {
467  pthread_kill(id, PAUSE_SIGNAL);
468  }
469  }
470 
471 #ifdef RESUME_SIGNAL
472  // install the "resume" handler
473  act.sa_sigaction = sig_resume_handler;
474  sigaction(RESUME_SIGNAL, &act, nullptr);
475 #endif
476  }
477 #endif
478 
479  const char* signalname = "unknown";
480  switch (sig) {
481  case SIGBUS:
482  {
483  signalname = "bus error";
484  break;
485  }
486  case SIGSEGV:
487  {
488  signalname = "segmentation violation";
489  break;
490  }
491  case SIGILL:
492  {
493  signalname = "illegal instruction";
494  break;
495  }
496  case SIGTERM:
497  {
498  signalname = "external termination request";
499  break;
500  }
501  default:
502  break;
503  }
504  full_cerr_write("\n\nA fatal system signal has occurred: ");
505  full_cerr_write(signalname);
506  full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n\n");
507 
509 
510  // resume the signal handlers to store the current module; we are not guaranteed they
511  // will have time to store their modules, so there is a race condition; this could be
512  // avoided by storing the module information before sleeping, a change that may be
513  // made when we're convinced accessing the thread-local current module is safe.
514 #ifdef RESUME_SIGNAL
515  std::size_t notified = 0;
516  if (InitRootHandlers::stackTracePause() > 0 && tids.size() > 1) {
517  for (auto id : tids) {
518  if (self != id) {
519  if (pthread_kill(id, RESUME_SIGNAL) == 0) ++notified;
520  }
521  }
522  }
523 #endif
524 
525  full_cerr_write("\nCurrent Modules:\n");
526 
527  // Checking tids.count(self) ensures that we only try to access the current module in
528  // CMSSW/TBB threads. Those threads access the thread-local current module at the same
529  // time the thread is registered, so any lazy allocation will have been done at that
530  // point. Not necessary on Linux with the current cmsRun linkage, as the thread-local
531  // is allocated at exec time, not lazily.
532  if (tids.count(self) > 0) {
533  char buff[moduleBufferSize] = "\nModule: ";
535  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleName().c_str(), moduleBufferSize);
536  strlcat(buff, ":", moduleBufferSize);
537  strlcat(buff, edm::CurrentModuleOnThread::getCurrentModuleOnThread()->moduleDescription()->moduleLabel().c_str(), moduleBufferSize);
538  } else {
539  strlcat(buff, "none", moduleBufferSize);
540  }
541  strlcat(buff, " (crashed)", moduleBufferSize);
542  full_cerr_write(buff);
543  } else {
544  full_cerr_write("\nModule: non-CMSSW (crashed)");
545  }
546 
547 #ifdef PAUSE_SIGNAL
548  // wait a short interval for the paused threads to resume and fill in their module
549  // information, then print
550  if (InitRootHandlers::doneModules_.is_lock_free()) {
551  int spincount = 0;
552  timespec t = { 0, 1000 };
553  while (++spincount < 1000 && InitRootHandlers::doneModules_ < notified) { nanosleep(&t, nullptr); }
554  for (std::size_t i = 0; i < InitRootHandlers::doneModules_; ++i) {
555  full_cerr_write(InitRootHandlers::moduleListBuffers_[i].data());
556  }
557  }
558 #endif
559 
560  full_cerr_write("\n\nA fatal system signal has occurred: ");
561  full_cerr_write(signalname);
562  full_cerr_write("\n");
563 
564  // For these four known cases, re-raise the signal so get the correct
565  // exit code.
566  if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM))
567  {
568  signal(sig, SIG_DFL);
569  raise(sig);
570  }
571  else
572  {
573  ::abort();
574  }
575  }
576 
577  void sig_abort(int sig, siginfo_t*, void*) {
578  full_cerr_write("\n\nFatal system signal has occurred during exit\n");
579 
580  // re-raise the signal to get the correct exit code
581  signal(sig, SIG_DFL);
582  raise(sig);
583 
584  // shouldn't get here
585  ::sleep(10);
586  ::abort();
587  }
588  }
589 
590  void set_default_signals() {
591  signal(SIGILL, SIG_DFL);
592  signal(SIGSEGV, SIG_DFL);
593  signal(SIGBUS, SIG_DFL);
594  signal(SIGTERM, SIG_DFL);
595  }
596 
597 } // end of unnamed namespace
598 
599 namespace edm {
600  namespace service {
601 
602  /*
603  * We've run into issues where GDB fails to print the thread which calls clone().
604  * To avoid this problem, we have an alternate approach below where the signal handler
605  * only reads/writes to a dedicated thread via pipes. The helper thread does the clone()
606  * invocation; we don't care if that thread is missing from the traceback in this case.
607  */
608  static void cmssw_stacktrace_fork();
609 
611  {
612  int toParent = childToParent_[1];
613  int fromParent = parentToChild_[0];
614  char buf[2]; buf[1] = '\0';
615 
616  while(true)
617  {
618  int result = full_read(fromParent, buf, 1);
619  if (result < 0)
620  {
621  // To avoid a deadlock (this function is NOT re-entrant), reset signals
622  // We never set them back to the CMSSW handler because we assume the parent
623  // thread will abort for us.
624  set_default_signals();
625  close(toParent);
626  full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
627  full_cerr_write(strerror(-result));
628  full_cerr_write("\n");
629  ::abort();
630  }
631  if (buf[0] == '1')
632  {
633  set_default_signals();
635  full_write(toParent, buf);
636  }
637  else if (buf[0] == '2')
638  {
639  // We have just finished forking. Reload the file descriptors for thread
640  // communication.
641  close(toParent);
642  close(fromParent);
643  toParent = childToParent_[1];
644  fromParent = parentToChild_[0];
645  }
646  else if (buf[0] == '3')
647  {
648  break;
649  }
650  else
651  {
652  set_default_signals();
653  close(toParent);
654  full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
655  full_cerr_write(buf);
656  full_cerr_write("\n");
657  ::abort();
658  }
659  }
660  }
661 
663  {
664  int result = full_write(parentToChild_[1], "1");
665  if (result < 0)
666  {
667  full_cerr_write("\n\nAttempt to request stacktrace failed: ");
668  full_cerr_write(strerror(-result));
669  full_cerr_write("\n");
670  return;
671  }
672  char buf[2]; buf[1] = '\0';
673  if ((result = full_read(childToParent_[0], buf, 1, 5*60)) < 0)
674  {
675  full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
676  if (result == -ETIMEDOUT)
677  {
678  full_cerr_write("timed out waiting for GDB to complete.");
679  }
680  else
681  {
682  full_cerr_write(strerror(-result));
683  }
684  full_cerr_write("\n");
685  return;
686  }
687  }
688 
690  {
691  char child_stack[4*1024];
692  char *child_stack_ptr = child_stack + 4*1024;
693  // On Linux, we currently use jemalloc. This registers pthread_atfork handlers; these
694  // handlers are *not* async-signal safe. Hence, a deadlock is possible if we invoke
695  // fork() from our signal handlers. Accordingly, we use clone (not POSIX, but AS-safe)
696  // as that is closer to the 'raw metal' syscall and avoids pthread_atfork handlers.
697  int pid =
698 #ifdef __linux__
699  clone(edm::service::cmssw_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
700 #else
701  fork();
702  if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
703  if (pid == 0) {edm::service::cmssw_stacktrace(nullptr); ::abort();}
704 #endif
705  if (pid == -1)
706  {
707  full_cerr_write("(Attempt to perform stack dump failed.)\n");
708  }
709  else
710  {
711  int status;
712  if (waitpid(pid, &status, 0) == -1)
713  {
714  full_cerr_write("(Failed to wait on stack dump output.)\n");
715  }
716  if (status)
717  {
718  full_cerr_write("(GDB stack trace failed unexpectedly)\n");
719  }
720  }
721  }
722 
723  int cmssw_stacktrace(void * /*arg*/)
724  {
726  // NOTE: this is NOT async-signal-safe at CERN's lxplus service.
727  // CERN uses LD_PRELOAD to replace execv with a function from libsnoopy which
728  // calls dlsym.
729 #ifdef __linux__
730  syscall(SYS_execve, "/bin/sh", argv, __environ);
731 #else
732  execv("/bin/sh", argv);
733 #endif
734  ::abort();
735  return 1;
736  }
737 
738  static char pstackName[] = "(CMSSW stack trace helper)";
739  static char dashC[] = "-c";
742  int InitRootHandlers::parentToChild_[2] = {-1, -1};
743  int InitRootHandlers::childToParent_[2] = {-1, -1};
744  std::unique_ptr<std::thread> InitRootHandlers::helperThread_;
746  std::vector<std::array<char,moduleBufferSize>> InitRootHandlers::moduleListBuffers_;
747  std::atomic<std::size_t> InitRootHandlers::nextModule_(0), InitRootHandlers::doneModules_(0);
749 
750 
752  : RootHandlers(),
753  unloadSigHandler_(pset.getUntrackedParameter<bool> ("UnloadRootSigHandler")),
754  resetErrHandler_(pset.getUntrackedParameter<bool> ("ResetRootErrHandler")),
755  loadAllDictionaries_(pset.getUntrackedParameter<bool>("LoadAllDictionaries")),
756  autoLibraryLoader_(loadAllDictionaries_ or pset.getUntrackedParameter<bool> ("AutoLibraryLoader"))
757  {
758  stackTracePause_ = pset.getUntrackedParameter<int> ("StackTracePauseTime");
759 
760  if(unloadSigHandler_) {
761  // Deactivate all the Root signal handlers and restore the system defaults
762  gSystem->ResetSignal(kSigChild);
763  gSystem->ResetSignal(kSigBus);
764  gSystem->ResetSignal(kSigSegmentationViolation);
765  gSystem->ResetSignal(kSigIllegalInstruction);
766  gSystem->ResetSignal(kSigSystem);
767  gSystem->ResetSignal(kSigPipe);
768  gSystem->ResetSignal(kSigAlarm);
769  gSystem->ResetSignal(kSigUrgent);
770  gSystem->ResetSignal(kSigFloatingException);
771  gSystem->ResetSignal(kSigWindowChanged);
772  } else if(pset.getUntrackedParameter<bool>("AbortOnSignal")){
773  cachePidInfo();
774 
775  //NOTE: ROOT can also be told to abort on these kinds of problems BUT
776  // it requires an TApplication to be instantiated which causes problems
777  gSystem->ResetSignal(kSigBus);
778  gSystem->ResetSignal(kSigSegmentationViolation);
779  gSystem->ResetSignal(kSigIllegalInstruction);
780  installCustomHandler(SIGBUS,sig_dostack_then_abort);
781  sigBusHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
782  installCustomHandler(SIGBUS,sig_abort);
783  });
784  installCustomHandler(SIGSEGV,sig_dostack_then_abort);
785  sigSegvHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
786  installCustomHandler(SIGSEGV,sig_abort);
787  });
788  installCustomHandler(SIGILL,sig_dostack_then_abort);
789  sigIllHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
790  installCustomHandler(SIGILL,sig_abort);
791  });
792  installCustomHandler(SIGTERM,sig_dostack_then_abort);
793  sigTermHandler_ = std::shared_ptr<const void>(nullptr,[](void*) {
794  installCustomHandler(SIGTERM,sig_abort);
795  });
796  }
797 
798  iReg.watchPreallocate([](edm::service::SystemBounds const& iBounds){
799  if (iBounds.maxNumberOfThreads() > moduleListBuffers_.size()) {
800  moduleListBuffers_.resize(iBounds.maxNumberOfThreads());
801  }
802  });
803 
804  if(resetErrHandler_) {
805 
806  // Replace the Root error handler with one that uses the MessageLogger
807  SetErrorHandler(RootErrorHandler);
808  }
809 
810  // Enable automatic Root library loading.
811  if(autoLibraryLoader_) {
812  gInterpreter->SetClassAutoloading(1);
813  }
814 
815  // Set ROOT parameters.
816  TTree::SetMaxTreeSize(kMaxLong64);
817  TH1::AddDirectory(kFALSE);
818  //G__SetCatchException(0);
819 
820  // Set custom streamers
822 
823  // Load the library containing dictionaries for std:: classes, if not already loaded.
824  if (!hasDictionary(typeid(std::vector<std::vector<unsigned int> >))) {
825  TypeWithDict::byName("std::vector<std::vector<unsigned int> >");
826  }
827 
828  int debugLevel = pset.getUntrackedParameter<int>("DebugLevel");
829  if(debugLevel >0) {
830  gDebug = debugLevel;
831  }
832 
833  // Enable Root implicit multi-threading
834  bool imt = pset.getUntrackedParameter<bool>("EnableIMT");
835  if (imt && not ROOT::IsImplicitMTEnabled()) {
836  ROOT::EnableImplicitMT();
837  }
838  }
839 
841  // close all open ROOT files
842  TIter iter(gROOT->GetListOfFiles());
843  TObject *obj = nullptr;
844  while(nullptr != (obj = iter.Next())) {
845  TFile* f = dynamic_cast<TFile*>(obj);
846  if(f) {
847  // We get a new iterator each time,
848  // because closing a file can invalidate the iterator
849  f->Close();
850  iter = TIter(gROOT->GetListOfFiles());
851  }
852  }
853  }
854 
856  //Tell Root we want to be multi-threaded
857  ROOT::EnableThreadSafety();
858 
859  //When threading, also have to keep ROOT from logging all TObjects into a list
860  TObject::SetObjectStat(false);
861 
862  //Have to avoid having Streamers modify themselves after they have been used
863  TVirtualStreamerInfo::Optimize(false);
864  }
865 
868  desc.setComment("Centralized interface to ROOT.");
869  desc.addUntracked<bool>("UnloadRootSigHandler", false)
870  ->setComment("If True, signals are handled by this service, rather than by ROOT.");
871  desc.addUntracked<bool>("ResetRootErrHandler", true)
872  ->setComment("If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
873  desc.addUntracked<bool>("AutoLibraryLoader", true)
874  ->setComment("If True, enables automatic loading of data dictionaries.");
875  desc.addUntracked<bool>("LoadAllDictionaries",false)
876  ->setComment("If True, loads all ROOT dictionaries.");
877  desc.addUntracked<bool>("EnableIMT",true)
878  ->setComment("If True, calls ROOT::EnableImplicitMT().");
879  desc.addUntracked<bool>("AbortOnSignal",true)
880  ->setComment("If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which attempts to do a clean shutdown.");
881  desc.addUntracked<int>("DebugLevel",0)
882  ->setComment("Sets ROOT's gDebug value.");
883  desc.addUntracked<int>("StackTracePauseTime", 300)
884  ->setComment("Seconds to pause other threads during stack trace.");
885  descriptions.add("InitRootHandlers", desc);
886  }
887 
888  char *const *
890  return pstackArgv_;
891  }
892 
893  void
895  s_ignoreWarnings =false;
896  }
897 
898  void
900  s_ignoreWarnings = true;
901  }
902 
903  void
905  {
906  if(helperThread_) {
907  //Another InitRootHandlers was initialized in this job, possibly
908  // because multiple EventProcessors are being used.
909  //In that case, we are already all setup
910  return;
911  }
912  if (snprintf(pidString_, pidStringLength_-1, "gdb -quiet -p %d 2>&1 <<EOF |\n"
913  "set width 0\n"
914  "set height 0\n"
915  "set pagination no\n"
916  "thread apply all bt\n"
917  "EOF\n"
918  "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'", getpid()) >= pidStringLength_)
919  {
920  std::ostringstream sstr;
921  sstr << "Unable to pre-allocate stacktrace handler information";
922  edm::Exception except(edm::errors::OtherCMS, sstr.str());
923  throw except;
924  }
925 
926  // These are initialized to -1; harmless to close an invalid FD.
927  // If this is called post-fork, we don't want to be communicating on
928  // these FDs as they are used internally by the parent.
929  close(childToParent_[0]);
930  close(childToParent_[1]);
931  childToParent_[0] = -1; childToParent_[1] = -1;
932  close(parentToChild_[0]);
933  close(parentToChild_[1]);
934  parentToChild_[0] = -1; parentToChild_[1] = -1;
935 
936  if (-1 == pipe2(childToParent_, O_CLOEXEC))
937  {
938  std::ostringstream sstr;
939  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
940  edm::Exception except(edm::errors::OtherCMS, sstr.str());
941  throw except;
942  }
943 
944  if (-1 == pipe2(parentToChild_, O_CLOEXEC))
945  {
946  close(childToParent_[0]); close(childToParent_[1]);
947  childToParent_[0] = -1; childToParent_[1] = -1;
948  std::ostringstream sstr;
949  sstr << "Failed to create child-to-parent pipes (errno=" << errno << "): " << strerror(errno);
950  edm::Exception except(edm::errors::OtherCMS, sstr.str());
951  throw except;
952  }
953 
954  helperThread_.reset(new std::thread(stacktraceHelperThread));
955  helperThread_->detach();
956  }
957 
958  } // end of namespace service
959 } // end of namespace edm
960 
964 
size
Write out results.
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
T getUntrackedParameter(std::string const &, T const &) const
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
Definition: ServiceMaker.h:117
double seconds()
void watchPreallocate(Preallocate::slot_type const &iSlot)
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
bool isProcessWideService(TFileService const *)
Definition: TFileService.h:99
static ModuleCallingContext const * getCurrentModuleOnThread()
void setRefCoreStreamer(bool resetAll=false)
std::vector< Variable::Flags > flags
Definition: MVATrainer.cc:135
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
#define constexpr
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
Definition: TypeWithDict.cc:59
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
Definition: Provenance.cc:27
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::shared_ptr< const void > sigIllHandler_
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
Definition: Exception.cc:235
double f[11][100]
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static char pstackName[]
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
Definition: eve_macros.cc:135
static char dashC[]
HLT enums.
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
static void fillDescriptions(ConfigurationDescriptions &descriptions)
SeverityLevel
bool hasDictionary(std::type_info const &)
#define O_NONBLOCK
Definition: SysFile.h:21
def write(self, setup)