21 #include "tbb/concurrent_unordered_set.h"
23 #include "tbb/task_scheduler_observer.h"
47 #include "TInterpreter.h"
50 #include "TUnixSystem.h"
52 #include "TVirtualStreamerInfo.h"
54 #include "TClassTable.h"
61 constexpr std::size_t moduleBufferSize = 128;
148 bool s_ignoreEverything =
false;
150 template <std::
size_t SIZE>
151 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
152 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
153 return (
search.find(
s) != std::string::npos);
154 }) != substrs.end());
157 constexpr std::array<const char* const, 8> in_message{
158 {
"no dictionary for class",
159 "already in TClassTable",
160 "matrix not positive definite",
161 "not a TStreamerInfo object",
162 "Problems declaring payload",
163 "Announced number of args different from the real number of argument passed",
164 "nbins is <=0 - set to nbins = 1",
165 "nbinsy is <=0 - set to nbinsy = 1"}};
167 constexpr std::array<const char* const, 6> in_location{{
"Fit",
168 "TDecompChol::Solve",
169 "THistPainter::PaintInit",
170 "TUnixSystem::SetDisplay",
171 "TGClient::GetFontByName",
174 constexpr std::array<const char* const, 3> in_message_print{{
"number of iterations was insufficient",
175 "bad integrand behavior",
176 "integral is divergent, or slowly convergent"}};
178 void RootErrorHandlerImpl(
int level,
char const*
location,
char const* message) {
185 if (
level >= kFatal) {
187 }
else if (
level >= kSysError) {
195 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
207 if (message !=
nullptr)
208 el_message = message;
219 size_t index1 = el_message.find(precursor);
220 if (index1 != std::string::npos) {
221 size_t index2 = index1 + precursor.length();
222 size_t index3 = el_message.find_first_of(
" :", index2);
223 if (index3 != std::string::npos) {
224 size_t substrlen = index3 - index2;
225 el_identifier +=
"-";
226 el_identifier += el_message.substr(index2, substrlen);
229 index1 = el_location.find(
"::");
230 if (index1 != std::string::npos) {
231 el_identifier +=
"/";
232 el_identifier += el_location.substr(0, index1);
238 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
239 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
240 (el_message.find(
"not set") != std::string::npos)) {
244 if ((el_message.find(
"Tree branches") != std::string::npos) &&
245 (el_message.find(
"different numbers of entries") != std::string::npos)) {
251 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
252 (
level <
kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
253 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
259 bool alreadyPrinted =
false;
260 if (find_if_string(el_message, in_message_print)) {
263 alreadyPrinted =
true;
278 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
279 std::ostringstream sstr;
280 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
283 except.clearMessage();
290 if (!alreadyPrinted) {
300 edm::LogInfo(
"Root_Information") << el_location << el_message;
305 void RootErrorHandler(
int level,
bool,
char const*
location,
char const* message) {
310 void set_default_signals() {
311 signal(SIGILL, SIG_DFL);
312 signal(SIGSEGV, SIG_DFL);
313 signal(SIGBUS, SIG_DFL);
315 signal(SIGABRT, SIG_DFL);
318 static int full_write(
int fd,
const char*
text) {
325 if (errno == EINTR) {
337 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
340 ssize_t complete = 0;
341 std::chrono::time_point<std::chrono::steady_clock> end_time =
346 }
else if ((-1 == (
flags = fcntl(
fd, F_GETFL)))) {
355 if (timeout_s >= 0) {
356 struct pollfd poll_info {
361 if (ms_remaining > 0) {
362 int rc = poll(&poll_info, 1, ms_remaining);
365 if (errno == EINTR || errno == EAGAIN) {
377 }
else if (ms_remaining < 0) {
385 if (complete == -1) {
386 if (errno == EINTR) {
388 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
391 int orig_errno = errno;
407 static int full_cerr_write(
const char*
text) {
return full_write(2,
text); }
413 #if defined(SIGRTMAX)
414 #define PAUSE_SIGNAL SIGRTMAX
415 #define RESUME_SIGNAL SIGRTMAX - 1
416 #elif defined(SIGINFO) // macOS/BSD
417 #define PAUSE_SIGNAL SIGINFO
418 #define RESUME_SIGNAL SIGALRM
422 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
425 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
430 sigemptyset(&sigset);
431 sigaddset(&sigset, RESUME_SIGNAL);
432 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
442 strlcpy(buff,
"\nModule: ", moduleBufferSize);
447 strlcat(buff,
":", moduleBufferSize);
452 strlcat(buff,
"none", moduleBufferSize);
459 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
464 const auto self = pthread_self();
468 struct sigaction act;
469 act.sa_sigaction = sig_pause_for_stacktrace;
471 sigemptyset(&act.sa_mask);
472 sigaction(PAUSE_SIGNAL, &act,
nullptr);
475 sigset_t pausesigset;
476 sigemptyset(&pausesigset);
477 sigaddset(&pausesigset, PAUSE_SIGNAL);
478 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
481 for (
auto id : tids) {
483 pthread_kill(
id, PAUSE_SIGNAL);
489 act.sa_sigaction = sig_resume_handler;
490 sigaction(RESUME_SIGNAL, &act,
nullptr);
495 const char* signalname =
"unknown";
498 signalname =
"bus error";
502 signalname =
"segmentation violation";
506 signalname =
"illegal instruction";
510 signalname =
"external termination request";
514 signalname =
"abort signal";
520 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
521 full_cerr_write(signalname);
522 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
531 std::size_t notified = 0;
533 for (
auto id : tids) {
535 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
542 full_cerr_write(
"\nCurrent Modules:\n");
549 if (tids.count(
self) > 0) {
550 char buff[moduleBufferSize] =
"\nModule: ";
555 strlcat(buff,
":", moduleBufferSize);
560 strlcat(buff,
"none", moduleBufferSize);
562 strlcat(buff,
" (crashed)", moduleBufferSize);
563 full_cerr_write(buff);
565 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
573 timespec
t = {0, 1000};
575 nanosleep(&
t,
nullptr);
583 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
584 full_cerr_write(signalname);
585 full_cerr_write(
"\n");
589 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig ==
SIGTERM) || (sig == SIGABRT)) {
590 signal(sig, SIG_DFL);
593 set_default_signals();
598 void sig_abort(
int sig, siginfo_t*,
void*) {
599 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
602 signal(sig, SIG_DFL);
606 set_default_signals();
631 int result = full_read(fromParent,
buf, 1);
636 set_default_signals();
638 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
639 full_cerr_write(strerror(-
result));
640 full_cerr_write(
"\n");
644 set_default_signals();
646 full_write(toParent,
buf);
647 }
else if (
buf[0] ==
'2') {
654 }
else if (
buf[0] ==
'3') {
657 set_default_signals();
659 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
660 full_cerr_write(
buf);
661 full_cerr_write(
"\n");
670 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
671 full_cerr_write(strerror(-
result));
672 full_cerr_write(
"\n");
678 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
679 if (
result == -ETIMEDOUT) {
680 full_cerr_write(
"timed out waiting for GDB to complete.");
682 full_cerr_write(strerror(-
result));
684 full_cerr_write(
"\n");
690 char child_stack[4 * 1024];
691 char* child_stack_ptr = child_stack + 4 * 1024;
701 if (child_stack_ptr) {
708 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
711 if (waitpid(pid, &
status, 0) == -1) {
712 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
715 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
721 set_default_signals();
728 syscall(SYS_execve,
"/bin/sh",
argv, __environ);
730 execv(
"/bin/sh",
argv);
750 unloadSigHandler_(
pset.getUntrackedParameter<
bool>(
"UnloadRootSigHandler")),
751 resetErrHandler_(
pset.getUntrackedParameter<
bool>(
"ResetRootErrHandler")),
752 loadAllDictionaries_(
pset.getUntrackedParameter<
bool>(
"LoadAllDictionaries")),
753 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<
bool>(
"AutoLibraryLoader")) {
758 gSystem->ResetSignal(kSigChild);
759 gSystem->ResetSignal(kSigBus);
760 gSystem->ResetSignal(kSigSegmentationViolation);
761 gSystem->ResetSignal(kSigIllegalInstruction);
762 gSystem->ResetSignal(kSigSystem);
763 gSystem->ResetSignal(kSigPipe);
764 gSystem->ResetSignal(kSigAlarm);
765 gSystem->ResetSignal(kSigUrgent);
766 gSystem->ResetSignal(kSigFloatingException);
767 gSystem->ResetSignal(kSigWindowChanged);
768 }
else if (
pset.getUntrackedParameter<
bool>(
"AbortOnSignal")) {
773 gSystem->ResetSignal(kSigBus);
774 gSystem->ResetSignal(kSigSegmentationViolation);
775 gSystem->ResetSignal(kSigIllegalInstruction);
786 signal(SIGABRT, SIG_DFL);
798 SetErrorHandler(RootErrorHandler);
803 gInterpreter->SetClassAutoloading(1);
807 TTree::SetMaxTreeSize(kMaxLong64);
808 TH1::AddDirectory(kFALSE);
825 bool imt =
pset.getUntrackedParameter<
bool>(
"EnableIMT");
826 if (imt && not ROOT::IsImplicitMTEnabled()) {
827 ROOT::EnableImplicitMT();
833 TIter iter(gROOT->GetListOfFiles());
834 TObject*
obj =
nullptr;
835 while (
nullptr != (
obj = iter.Next())) {
836 TFile*
f = dynamic_cast<TFile*>(
obj);
841 iter = TIter(gROOT->GetListOfFiles());
848 ROOT::EnableThreadSafety();
851 TObject::SetObjectStat(
false);
854 TVirtualStreamerInfo::Optimize(
false);
859 desc.setComment(
"Centralized interface to ROOT.");
860 desc.addUntracked<
bool>(
"UnloadRootSigHandler",
false)
861 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
862 desc.addUntracked<
bool>(
"ResetRootErrHandler",
true)
864 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
865 desc.addUntracked<
bool>(
"AutoLibraryLoader",
true)
866 ->setComment(
"If True, enables automatic loading of data dictionaries.");
867 desc.addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
868 desc.addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
869 desc.addUntracked<
bool>(
"AbortOnSignal",
true)
871 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
872 "attempts to do a clean shutdown.");
873 desc.addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
874 desc.addUntracked<
int>(
"StackTracePauseTime", 300)
875 ->setComment(
"Seconds to pause other threads during stack trace.");
876 descriptions.
add(
"InitRootHandlers",
desc);
894 "date; gdb -quiet -p %d 2>&1 <<EOF |\n"
897 "set pagination no\n"
898 "thread apply all bt\n"
900 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
902 std::ostringstream sstr;
903 sstr <<
"Unable to pre-allocate stacktrace handler information";
921 std::ostringstream sstr;
922 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
932 std::ostringstream sstr;
933 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);