21 #include "tbb/concurrent_unordered_set.h"
23 #include "tbb/task_scheduler_observer.h"
24 #include "tbb/global_control.h"
48 #include "TInterpreter.h"
51 #include "TUnixSystem.h"
53 #include "TVirtualStreamerInfo.h"
55 #include "TClassTable.h"
62 constexpr std::size_t moduleBufferSize = 128;
159 bool s_ignoreEverything =
false;
161 template <std::
size_t SIZE>
162 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
163 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
164 return (
search.find(
s) != std::string::npos);
165 }) != substrs.end());
169 constexpr std::array<const char* const, 9> in_message{
170 {
"no dictionary for class",
171 "already in TClassTable",
172 "matrix not positive definite",
173 "not a TStreamerInfo object",
174 "Problems declaring payload",
175 "Announced number of args different from the real number of argument passed",
176 "nbins is <=0 - set to nbins = 1",
177 "nbinsy is <=0 - set to nbinsy = 1",
178 "tbb::global_control is limiting"}};
181 constexpr std::array<const char* const, 7> in_location{{
"Fit",
182 "TDecompChol::Solve",
183 "THistPainter::PaintInit",
184 "TUnixSystem::SetDisplay",
185 "TGClient::GetFontByName",
187 "RTaskArenaWrapper"}};
189 constexpr std::array<const char* const, 3> in_message_print_error{{
"number of iterations was insufficient",
190 "bad integrand behavior",
191 "integral is divergent, or slowly convergent"}};
193 void RootErrorHandlerImpl(
int level,
char const*
location,
char const* message) {
200 if (
level >= kFatal) {
202 }
else if (
level >= kSysError) {
210 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
222 if (message !=
nullptr)
223 el_message = message;
234 size_t index1 = el_message.find(precursor);
235 if (index1 != std::string::npos) {
236 size_t index2 = index1 + precursor.length();
237 size_t index3 = el_message.find_first_of(
" :", index2);
238 if (index3 != std::string::npos) {
239 size_t substrlen = index3 - index2;
240 el_identifier +=
"-";
241 el_identifier += el_message.substr(index2, substrlen);
244 index1 = el_location.find(
"::");
245 if (index1 != std::string::npos) {
246 el_identifier +=
"/";
247 el_identifier += el_location.substr(0, index1);
253 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
254 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
255 (el_message.find(
"not set") != std::string::npos)) {
259 if ((el_message.find(
"Tree branches") != std::string::npos) &&
260 (el_message.find(
"different numbers of entries") != std::string::npos)) {
266 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
267 (
level <
kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
268 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
274 bool alreadyPrinted =
false;
275 if (find_if_string(el_message, in_message_print_error)) {
278 alreadyPrinted =
true;
293 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
294 std::ostringstream sstr;
295 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
298 except.clearMessage();
305 if (!alreadyPrinted) {
315 edm::LogInfo(
"Root_Information") << el_location << el_message;
320 void RootErrorHandler(
int level,
bool,
char const*
location,
char const* message) {
325 void set_default_signals() {
326 signal(SIGILL, SIG_DFL);
327 signal(SIGSEGV, SIG_DFL);
328 signal(SIGBUS, SIG_DFL);
330 signal(SIGABRT, SIG_DFL);
333 static int full_write(
int fd,
const char*
text) {
340 if (errno == EINTR) {
352 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
355 ssize_t complete = 0;
356 std::chrono::time_point<std::chrono::steady_clock> end_time =
361 }
else if ((-1 == (
flags = fcntl(
fd, F_GETFL)))) {
370 if (timeout_s >= 0) {
371 struct pollfd poll_info {
376 if (ms_remaining > 0) {
377 int rc = poll(&poll_info, 1, ms_remaining);
380 if (errno == EINTR || errno == EAGAIN) {
392 }
else if (ms_remaining < 0) {
400 if (complete == -1) {
401 if (errno == EINTR) {
403 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
406 int orig_errno = errno;
422 static int full_cerr_write(
const char*
text) {
return full_write(2,
text); }
428 #if defined(SIGRTMAX)
429 #define PAUSE_SIGNAL SIGRTMAX
430 #define RESUME_SIGNAL SIGRTMAX - 1
431 #elif defined(SIGINFO) // macOS/BSD
432 #define PAUSE_SIGNAL SIGINFO
433 #define RESUME_SIGNAL SIGALRM
437 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
440 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
445 sigemptyset(&sigset);
446 sigaddset(&sigset, RESUME_SIGNAL);
447 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
457 strlcpy(buff,
"\nModule: ", moduleBufferSize);
462 strlcat(buff,
":", moduleBufferSize);
467 strlcat(buff,
"none", moduleBufferSize);
474 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
479 const auto self = pthread_self();
483 struct sigaction act;
484 act.sa_sigaction = sig_pause_for_stacktrace;
486 sigemptyset(&act.sa_mask);
487 sigaction(PAUSE_SIGNAL, &act,
nullptr);
490 sigset_t pausesigset;
491 sigemptyset(&pausesigset);
492 sigaddset(&pausesigset, PAUSE_SIGNAL);
493 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
496 for (
auto id : tids) {
498 pthread_kill(
id, PAUSE_SIGNAL);
504 act.sa_sigaction = sig_resume_handler;
505 sigaction(RESUME_SIGNAL, &act,
nullptr);
510 const char* signalname =
"unknown";
513 signalname =
"bus error";
517 signalname =
"segmentation violation";
521 signalname =
"illegal instruction";
525 signalname =
"external termination request";
529 signalname =
"abort signal";
535 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
536 full_cerr_write(signalname);
537 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
546 std::size_t notified = 0;
548 for (
auto id : tids) {
550 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
557 full_cerr_write(
"\nCurrent Modules:\n");
564 if (tids.count(
self) > 0) {
565 char buff[moduleBufferSize] =
"\nModule: ";
570 strlcat(buff,
":", moduleBufferSize);
575 strlcat(buff,
"none", moduleBufferSize);
577 strlcat(buff,
" (crashed)", moduleBufferSize);
578 full_cerr_write(buff);
580 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
588 timespec
t = {0, 1000};
590 nanosleep(&
t,
nullptr);
598 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
599 full_cerr_write(signalname);
600 full_cerr_write(
"\n");
604 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig ==
SIGTERM) || (sig == SIGABRT)) {
605 signal(sig, SIG_DFL);
608 set_default_signals();
613 void sig_abort(
int sig, siginfo_t*,
void*) {
614 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
617 signal(sig, SIG_DFL);
621 set_default_signals();
646 int result = full_read(fromParent,
buf, 1);
651 set_default_signals();
653 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
654 full_cerr_write(strerror(-
result));
655 full_cerr_write(
"\n");
659 set_default_signals();
661 full_write(toParent,
buf);
662 }
else if (
buf[0] ==
'2') {
669 }
else if (
buf[0] ==
'3') {
672 set_default_signals();
674 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
675 full_cerr_write(
buf);
676 full_cerr_write(
"\n");
685 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
686 full_cerr_write(strerror(-
result));
687 full_cerr_write(
"\n");
693 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
694 if (
result == -ETIMEDOUT) {
695 full_cerr_write(
"timed out waiting for GDB to complete.");
697 full_cerr_write(strerror(-
result));
699 full_cerr_write(
"\n");
705 char child_stack[4 * 1024];
706 char* child_stack_ptr = child_stack + 4 * 1024;
716 if (child_stack_ptr) {
723 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
726 if (waitpid(pid, &
status, 0) == -1) {
727 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
730 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
736 set_default_signals();
743 syscall(SYS_execve,
"/bin/sh",
argv, __environ);
745 execv(
"/bin/sh",
argv);
765 unloadSigHandler_(
pset.getUntrackedParameter<
bool>(
"UnloadRootSigHandler")),
766 resetErrHandler_(
pset.getUntrackedParameter<
bool>(
"ResetRootErrHandler")),
767 loadAllDictionaries_(
pset.getUntrackedParameter<
bool>(
"LoadAllDictionaries")),
768 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<
bool>(
"AutoLibraryLoader")),
769 interactiveDebug_(
pset.getUntrackedParameter<
bool>(
"InteractiveDebug")) {
783 gSystem->ResetSignal(kSigChild);
784 gSystem->ResetSignal(kSigBus);
785 gSystem->ResetSignal(kSigSegmentationViolation);
786 gSystem->ResetSignal(kSigIllegalInstruction);
787 gSystem->ResetSignal(kSigSystem);
788 gSystem->ResetSignal(kSigPipe);
789 gSystem->ResetSignal(kSigAlarm);
790 gSystem->ResetSignal(kSigUrgent);
791 gSystem->ResetSignal(kSigFloatingException);
792 gSystem->ResetSignal(kSigWindowChanged);
793 }
else if (
pset.getUntrackedParameter<
bool>(
"AbortOnSignal")) {
798 gSystem->ResetSignal(kSigBus);
799 gSystem->ResetSignal(kSigSegmentationViolation);
800 gSystem->ResetSignal(kSigIllegalInstruction);
811 signal(SIGABRT, SIG_DFL);
823 SetErrorHandler(RootErrorHandler);
828 gInterpreter->SetClassAutoloading(1);
832 TTree::SetMaxTreeSize(kMaxLong64);
833 TH1::AddDirectory(kFALSE);
850 bool imt =
pset.getUntrackedParameter<
bool>(
"EnableIMT");
851 if (imt && not ROOT::IsImplicitMTEnabled()) {
854 ROOT::EnableImplicitMT(tbb::global_control::active_value(tbb::global_control::max_allowed_parallelism));
860 TIter iter(gROOT->GetListOfFiles());
861 TObject*
obj =
nullptr;
862 while (
nullptr != (
obj = iter.Next())) {
863 TFile*
f = dynamic_cast<TFile*>(
obj);
868 iter = TIter(gROOT->GetListOfFiles());
877 ROOT::EnableThreadSafety();
880 TObject::SetObjectStat(
false);
883 TVirtualStreamerInfo::Optimize(
false);
888 desc.setComment(
"Centralized interface to ROOT.");
889 desc.addUntracked<
bool>(
"UnloadRootSigHandler",
false)
890 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
891 desc.addUntracked<
bool>(
"ResetRootErrHandler",
true)
893 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
894 desc.addUntracked<
bool>(
"AutoLibraryLoader",
true)
895 ->setComment(
"If True, enables automatic loading of data dictionaries.");
896 desc.addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
897 desc.addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
898 desc.addUntracked<
bool>(
"AbortOnSignal",
true)
900 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which "
901 "attempts to do a clean shutdown.");
902 desc.addUntracked<
bool>(
"InteractiveDebug",
false)
904 "If True, leave gdb attached to cmsRun after a crash; "
905 "if False, attach gdb, print a stack trace, and quit gdb");
906 desc.addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
907 desc.addUntracked<
int>(
"StackTracePauseTime", 300)
908 ->setComment(
"Seconds to pause other threads during stack trace.");
909 descriptions.
add(
"InitRootHandlers",
desc);
931 "set pagination no\n"
932 "thread apply all bt\n"
934 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
937 std::ostringstream sstr;
938 sstr <<
"Unable to pre-allocate stacktrace handler information";
956 std::ostringstream sstr;
957 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
967 std::ostringstream sstr;
968 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);