22 #include "tbb/task_scheduler_observer.h"
23 #include "tbb/concurrent_unordered_set.h"
45 #include "TInterpreter.h"
48 #include "TUnixSystem.h"
50 #include "TVirtualStreamerInfo.h"
52 #include "TClassTable.h"
59 constexpr std::size_t moduleBufferSize = 128;
146 bool s_ignoreEverything =
false;
148 template <std::
size_t SIZE>
149 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
150 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
151 return (
search.find(
s) != std::string::npos);
152 }) != substrs.end());
155 constexpr std::array<const char* const, 8> in_message{
156 {
"no dictionary for class",
157 "already in TClassTable",
158 "matrix not positive definite",
159 "not a TStreamerInfo object",
160 "Problems declaring payload",
161 "Announced number of args different from the real number of argument passed",
162 "nbins is <=0 - set to nbins = 1",
163 "nbinsy is <=0 - set to nbinsy = 1"}};
165 constexpr std::array<const char* const, 6> in_location{{
"Fit",
166 "TDecompChol::Solve",
167 "THistPainter::PaintInit",
168 "TUnixSystem::SetDisplay",
169 "TGClient::GetFontByName",
172 constexpr std::array<const char* const, 3> in_message_print{{
"number of iterations was insufficient",
173 "bad integrand behavior",
174 "integral is divergent, or slowly convergent"}};
176 void RootErrorHandlerImpl(
int level,
char const*
location,
char const* message) {
183 if (
level >= kFatal) {
185 }
else if (
level >= kSysError) {
193 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
205 if (message !=
nullptr)
206 el_message = message;
217 size_t index1 = el_message.find(precursor);
218 if (index1 != std::string::npos) {
219 size_t index2 = index1 + precursor.length();
220 size_t index3 = el_message.find_first_of(
" :", index2);
221 if (index3 != std::string::npos) {
222 size_t substrlen = index3 - index2;
223 el_identifier +=
"-";
224 el_identifier += el_message.substr(index2, substrlen);
227 index1 = el_location.find(
"::");
228 if (index1 != std::string::npos) {
229 el_identifier +=
"/";
230 el_identifier += el_location.substr(0, index1);
236 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
237 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
238 (el_message.find(
"not set") != std::string::npos)) {
242 if ((el_message.find(
"Tree branches") != std::string::npos) &&
243 (el_message.find(
"different numbers of entries") != std::string::npos)) {
249 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
250 (
level <
kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
251 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
257 bool alreadyPrinted =
false;
258 if (find_if_string(el_message, in_message_print)) {
261 alreadyPrinted =
true;
276 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
277 std::ostringstream sstr;
278 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
281 except.clearMessage();
288 if (!alreadyPrinted) {
298 edm::LogInfo(
"Root_Information") << el_location << el_message;
303 void RootErrorHandler(
int level,
bool,
char const*
location,
char const* message) {
308 void set_default_signals() {
309 signal(SIGILL, SIG_DFL);
310 signal(SIGSEGV, SIG_DFL);
311 signal(SIGBUS, SIG_DFL);
313 signal(SIGABRT, SIG_DFL);
316 static int full_write(
int fd,
const char*
text) {
323 if (errno == EINTR) {
335 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
338 ssize_t complete = 0;
339 std::chrono::time_point<std::chrono::steady_clock> end_time =
344 }
else if ((-1 == (
flags = fcntl(
fd, F_GETFL)))) {
353 if (timeout_s >= 0) {
354 struct pollfd poll_info {
359 if (ms_remaining > 0) {
360 int rc = poll(&poll_info, 1, ms_remaining);
363 if (errno == EINTR || errno == EAGAIN) {
375 }
else if (ms_remaining < 0) {
383 if (complete == -1) {
384 if (errno == EINTR) {
386 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
389 int orig_errno = errno;
405 static int full_cerr_write(
const char*
text) {
return full_write(2,
text); }
411 #if defined(SIGRTMAX)
412 #define PAUSE_SIGNAL SIGRTMAX
413 #define RESUME_SIGNAL SIGRTMAX - 1
414 #elif defined(SIGINFO) // macOS/BSD
415 #define PAUSE_SIGNAL SIGINFO
416 #define RESUME_SIGNAL SIGALRM
420 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
423 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
428 sigemptyset(&sigset);
429 sigaddset(&sigset, RESUME_SIGNAL);
430 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
440 strlcpy(buff,
"\nModule: ", moduleBufferSize);
445 strlcat(buff,
":", moduleBufferSize);
450 strlcat(buff,
"none", moduleBufferSize);
457 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
462 const auto self = pthread_self();
466 struct sigaction act;
467 act.sa_sigaction = sig_pause_for_stacktrace;
469 sigemptyset(&act.sa_mask);
470 sigaction(PAUSE_SIGNAL, &act,
nullptr);
473 sigset_t pausesigset;
474 sigemptyset(&pausesigset);
475 sigaddset(&pausesigset, PAUSE_SIGNAL);
476 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
479 for (
auto id : tids) {
481 pthread_kill(
id, PAUSE_SIGNAL);
487 act.sa_sigaction = sig_resume_handler;
488 sigaction(RESUME_SIGNAL, &act,
nullptr);
493 const char* signalname =
"unknown";
496 signalname =
"bus error";
500 signalname =
"segmentation violation";
504 signalname =
"illegal instruction";
508 signalname =
"external termination request";
512 signalname =
"abort signal";
518 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
519 full_cerr_write(signalname);
520 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
529 std::size_t notified = 0;
531 for (
auto id : tids) {
533 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
540 full_cerr_write(
"\nCurrent Modules:\n");
547 if (tids.count(
self) > 0) {
548 char buff[moduleBufferSize] =
"\nModule: ";
553 strlcat(buff,
":", moduleBufferSize);
558 strlcat(buff,
"none", moduleBufferSize);
560 strlcat(buff,
" (crashed)", moduleBufferSize);
561 full_cerr_write(buff);
563 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
571 timespec
t = {0, 1000};
573 nanosleep(&
t,
nullptr);
581 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
582 full_cerr_write(signalname);
583 full_cerr_write(
"\n");
587 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig ==
SIGTERM) || (sig == SIGABRT)) {
588 signal(sig, SIG_DFL);
591 set_default_signals();
596 void sig_abort(
int sig, siginfo_t*,
void*) {
597 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
600 signal(sig, SIG_DFL);
604 set_default_signals();
629 int result = full_read(fromParent,
buf, 1);
634 set_default_signals();
636 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
637 full_cerr_write(strerror(-
result));
638 full_cerr_write(
"\n");
642 set_default_signals();
644 full_write(toParent,
buf);
645 }
else if (
buf[0] ==
'2') {
652 }
else if (
buf[0] ==
'3') {
655 set_default_signals();
657 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
658 full_cerr_write(
buf);
659 full_cerr_write(
"\n");
668 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
669 full_cerr_write(strerror(-
result));
670 full_cerr_write(
"\n");
676 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
677 if (
result == -ETIMEDOUT) {
678 full_cerr_write(
"timed out waiting for GDB to complete.");
680 full_cerr_write(strerror(-
result));
682 full_cerr_write(
"\n");
688 char child_stack[4 * 1024];
689 char* child_stack_ptr = child_stack + 4 * 1024;
699 if (child_stack_ptr) {
706 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
709 if (waitpid(pid, &
status, 0) == -1) {
710 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
713 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
719 set_default_signals();
726 syscall(SYS_execve,
"/bin/sh",
argv, __environ);
728 execv(
"/bin/sh",
argv);
748 unloadSigHandler_(
pset.getUntrackedParameter<
bool>(
"UnloadRootSigHandler")),
749 resetErrHandler_(
pset.getUntrackedParameter<
bool>(
"ResetRootErrHandler")),
750 loadAllDictionaries_(
pset.getUntrackedParameter<
bool>(
"LoadAllDictionaries")),
751 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<
bool>(
"AutoLibraryLoader")) {
756 gSystem->ResetSignal(kSigChild);
757 gSystem->ResetSignal(kSigBus);
758 gSystem->ResetSignal(kSigSegmentationViolation);
759 gSystem->ResetSignal(kSigIllegalInstruction);
760 gSystem->ResetSignal(kSigSystem);
761 gSystem->ResetSignal(kSigPipe);
762 gSystem->ResetSignal(kSigAlarm);
763 gSystem->ResetSignal(kSigUrgent);
764 gSystem->ResetSignal(kSigFloatingException);
765 gSystem->ResetSignal(kSigWindowChanged);
766 }
else if (
pset.getUntrackedParameter<
bool>(
"AbortOnSignal")) {
771 gSystem->ResetSignal(kSigBus);
772 gSystem->ResetSignal(kSigSegmentationViolation);
773 gSystem->ResetSignal(kSigIllegalInstruction);
784 signal(SIGABRT, SIG_DFL);
796 SetErrorHandler(RootErrorHandler);
801 gInterpreter->SetClassAutoloading(1);
805 TTree::SetMaxTreeSize(kMaxLong64);
806 TH1::AddDirectory(kFALSE);
813 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int>>))) {
823 bool imt =
pset.getUntrackedParameter<
bool>(
"EnableIMT");
824 if (imt && not ROOT::IsImplicitMTEnabled()) {
825 ROOT::EnableImplicitMT();
831 TIter iter(gROOT->GetListOfFiles());
832 TObject*
obj =
nullptr;
833 while (
nullptr != (
obj = iter.Next())) {
834 TFile*
f = dynamic_cast<TFile*>(
obj);
839 iter = TIter(gROOT->GetListOfFiles());
846 ROOT::EnableThreadSafety();
849 TObject::SetObjectStat(
false);
852 TVirtualStreamerInfo::Optimize(
false);
857 desc.
setComment(
"Centralized interface to ROOT.");
859 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
862 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
864 ->setComment(
"If True, enables automatic loading of data dictionaries.");
865 desc.
addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
866 desc.
addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
869 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
870 "attempts to do a clean shutdown.");
871 desc.
addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
873 ->setComment(
"Seconds to pause other threads during stack trace.");
874 descriptions.
add(
"InitRootHandlers", desc);
892 "date; gdb -quiet -p %d 2>&1 <<EOF |\n"
895 "set pagination no\n"
896 "thread apply all bt\n"
898 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
900 std::ostringstream sstr;
901 sstr <<
"Unable to pre-allocate stacktrace handler information";
919 std::ostringstream sstr;
920 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
930 std::ostringstream sstr;
931 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);