22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 45 #include "TInterpreter.h" 48 #include "TUnixSystem.h" 50 #include "TVirtualStreamerInfo.h" 52 #include "TClassTable.h" 59 constexpr std::size_t moduleBufferSize = 128;
146 bool s_ignoreEverything =
false;
148 template <std::
size_t SIZE>
149 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
150 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
151 return (search.find(
s) != std::string::npos);
152 }) != substrs.end());
155 constexpr std::array<const char* const, 8> in_message{
156 {
"no dictionary for class",
157 "already in TClassTable",
158 "matrix not positive definite",
159 "not a TStreamerInfo object",
160 "Problems declaring payload",
161 "Announced number of args different from the real number of argument passed",
162 "nbins is <=0 - set to nbins = 1",
163 "nbinsy is <=0 - set to nbinsy = 1"}};
165 constexpr std::array<const char* const, 6> in_location{{
"Fit",
166 "TDecompChol::Solve",
167 "THistPainter::PaintInit",
168 "TUnixSystem::SetDisplay",
169 "TGClient::GetFontByName",
172 constexpr std::array<const char* const, 3> in_message_print{{
"number of iterations was insufficient",
173 "bad integrand behavior",
174 "integral is divergent, or slowly convergent"}};
176 void RootErrorHandlerImpl(
int level,
char const* location,
char const* message) {
183 if (level >= kFatal) {
185 }
else if (level >= kSysError) {
187 }
else if (level >= kError) {
189 }
else if (level >= kWarning) {
193 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
201 if (location !=
nullptr)
205 if (message !=
nullptr)
206 el_message = message;
217 size_t index1 = el_message.find(precursor);
218 if (index1 != std::string::npos) {
219 size_t index2 = index1 + precursor.length();
220 size_t index3 = el_message.find_first_of(
" :", index2);
221 if (index3 != std::string::npos) {
222 size_t substrlen = index3 - index2;
223 el_identifier +=
"-";
224 el_identifier += el_message.substr(index2, substrlen);
227 index1 = el_location.find(
"::");
228 if (index1 != std::string::npos) {
229 el_identifier +=
"/";
230 el_identifier += el_location.substr(0, index1);
236 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
237 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
238 (el_message.find(
"not set") != std::string::npos)) {
242 if ((el_message.find(
"Tree branches") != std::string::npos) &&
243 (el_message.find(
"different numbers of entries") != std::string::npos)) {
249 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
250 (level < kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
251 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
257 bool alreadyPrinted =
false;
258 if (find_if_string(el_message, in_message_print)) {
261 alreadyPrinted =
true;
276 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
277 std::ostringstream sstr;
278 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
281 except.clearMessage();
288 if (!alreadyPrinted) {
298 edm::LogInfo(
"Root_Information") << el_location << el_message;
303 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
304 RootErrorHandlerImpl(level, location, message);
308 void set_default_signals() {
309 signal(SIGILL, SIG_DFL);
310 signal(SIGSEGV, SIG_DFL);
311 signal(SIGBUS, SIG_DFL);
312 signal(SIGTERM, SIG_DFL);
313 signal(SIGABRT, SIG_DFL);
316 static int full_write(
int fd,
const char*
text) {
318 size_t count = strlen(text);
321 written =
write(fd, buffer, count);
323 if (errno == EINTR) {
335 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
338 ssize_t complete = 0;
339 std::chrono::time_point<std::chrono::steady_clock> end_time =
344 }
else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
348 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
353 if (timeout_s >= 0) {
354 struct pollfd poll_info {
359 if (ms_remaining > 0) {
360 int rc = poll(&poll_info, 1, ms_remaining);
363 if (errno == EINTR || errno == EAGAIN) {
370 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
371 fcntl(fd, F_SETFL, flags);
375 }
else if (ms_remaining < 0) {
376 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
377 fcntl(fd, F_SETFL, flags);
382 complete = read(fd, buf, count);
383 if (complete == -1) {
384 if (errno == EINTR) {
386 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
389 int orig_errno = errno;
390 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
391 fcntl(fd, F_SETFL, flags);
399 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
400 fcntl(fd, F_SETFL, flags);
405 static int full_cerr_write(
const char* text) {
return full_write(2, text); }
411 #if defined(SIGRTMAX) 412 #define PAUSE_SIGNAL SIGRTMAX 413 #define RESUME_SIGNAL SIGRTMAX - 1 414 #elif defined(SIGINFO) // macOS/BSD 415 #define PAUSE_SIGNAL SIGINFO 416 #define RESUME_SIGNAL SIGALRM 420 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
423 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
428 sigemptyset(&sigset);
429 sigaddset(&sigset, RESUME_SIGNAL);
430 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
440 strlcpy(buff,
"\nModule: ", moduleBufferSize);
445 strlcat(buff,
":", moduleBufferSize);
450 strlcat(buff,
"none", moduleBufferSize);
457 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
462 const auto self = pthread_self();
466 struct sigaction act;
467 act.sa_sigaction = sig_pause_for_stacktrace;
469 sigemptyset(&act.sa_mask);
470 sigaction(PAUSE_SIGNAL, &act,
nullptr);
473 sigset_t pausesigset;
474 sigemptyset(&pausesigset);
475 sigaddset(&pausesigset, PAUSE_SIGNAL);
476 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
479 for (
auto id : tids) {
481 pthread_kill(
id, PAUSE_SIGNAL);
487 act.sa_sigaction = sig_resume_handler;
488 sigaction(RESUME_SIGNAL, &act,
nullptr);
493 const char* signalname =
"unknown";
496 signalname =
"bus error";
500 signalname =
"segmentation violation";
504 signalname =
"illegal instruction";
508 signalname =
"external termination request";
512 signalname =
"abort signal";
518 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
519 full_cerr_write(signalname);
520 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
529 std::size_t notified = 0;
531 for (
auto id : tids) {
533 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
540 full_cerr_write(
"\nCurrent Modules:\n");
547 if (tids.count(
self) > 0) {
548 char buff[moduleBufferSize] =
"\nModule: ";
553 strlcat(buff,
":", moduleBufferSize);
558 strlcat(buff,
"none", moduleBufferSize);
560 strlcat(buff,
" (crashed)", moduleBufferSize);
561 full_cerr_write(buff);
563 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
571 timespec
t = {0, 1000};
573 nanosleep(&t,
nullptr);
581 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
582 full_cerr_write(signalname);
583 full_cerr_write(
"\n");
587 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT)) {
588 signal(sig, SIG_DFL);
591 set_default_signals();
596 void sig_abort(
int sig, siginfo_t*,
void*) {
597 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
600 signal(sig, SIG_DFL);
604 set_default_signals();
629 int result = full_read(fromParent, buf, 1);
634 set_default_signals();
636 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
637 full_cerr_write(strerror(-result));
638 full_cerr_write(
"\n");
642 set_default_signals();
644 full_write(toParent, buf);
645 }
else if (buf[0] ==
'2') {
652 }
else if (buf[0] ==
'3') {
655 set_default_signals();
657 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
658 full_cerr_write(buf);
659 full_cerr_write(
"\n");
668 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
669 full_cerr_write(strerror(-result));
670 full_cerr_write(
"\n");
675 if ((result = full_read(
childToParent_[0], buf, 1, 5 * 60)) < 0) {
676 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
677 if (result == -ETIMEDOUT) {
678 full_cerr_write(
"timed out waiting for GDB to complete.");
680 full_cerr_write(strerror(-result));
682 full_cerr_write(
"\n");
688 char child_stack[4 * 1024];
689 char* child_stack_ptr = child_stack + 4 * 1024;
699 if (child_stack_ptr) {
706 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
709 if (waitpid(pid, &status, 0) == -1) {
710 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
713 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
719 set_default_signals();
726 syscall(SYS_execve,
"/bin/sh", argv, __environ);
728 execv(
"/bin/sh", argv);
756 gSystem->ResetSignal(kSigChild);
757 gSystem->ResetSignal(kSigBus);
758 gSystem->ResetSignal(kSigSegmentationViolation);
759 gSystem->ResetSignal(kSigIllegalInstruction);
760 gSystem->ResetSignal(kSigSystem);
761 gSystem->ResetSignal(kSigPipe);
762 gSystem->ResetSignal(kSigAlarm);
763 gSystem->ResetSignal(kSigUrgent);
764 gSystem->ResetSignal(kSigFloatingException);
765 gSystem->ResetSignal(kSigWindowChanged);
771 gSystem->ResetSignal(kSigBus);
772 gSystem->ResetSignal(kSigSegmentationViolation);
773 gSystem->ResetSignal(kSigIllegalInstruction);
784 signal(SIGABRT, SIG_DFL);
796 SetErrorHandler(RootErrorHandler);
801 gInterpreter->SetClassAutoloading(1);
805 TTree::SetMaxTreeSize(kMaxLong64);
806 TH1::AddDirectory(kFALSE);
813 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int>>))) {
818 if (debugLevel > 0) {
824 if (imt && not ROOT::IsImplicitMTEnabled()) {
825 ROOT::EnableImplicitMT();
831 TIter iter(gROOT->GetListOfFiles());
832 TObject*
obj =
nullptr;
833 while (
nullptr != (obj = iter.Next())) {
834 TFile*
f =
dynamic_cast<TFile*
>(
obj);
839 iter = TIter(gROOT->GetListOfFiles());
846 ROOT::EnableThreadSafety();
849 TObject::SetObjectStat(
false);
852 TVirtualStreamerInfo::Optimize(
false);
857 desc.
setComment(
"Centralized interface to ROOT.");
859 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
862 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
864 ->setComment(
"If True, enables automatic loading of data dictionaries.");
865 desc.
addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
866 desc.
addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
869 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which " 870 "attempts to do a clean shutdown.");
871 desc.
addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
873 ->setComment(
"Seconds to pause other threads during stack trace.");
874 descriptions.
add(
"InitRootHandlers", desc);
892 "date; gdb -quiet -p %d 2>&1 <<EOF |\n" 895 "set pagination no\n" 896 "thread apply all bt\n" 898 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
900 std::ostringstream sstr;
901 sstr <<
"Unable to pre-allocate stacktrace handler information";
919 std::ostringstream sstr;
920 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
930 std::ostringstream sstr;
931 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
bool loadAllDictionaries_
void enableWarnings_() override
static void cmssw_stacktrace_fork()
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
bool isProcessWideService(TFileService const *)
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
static ModuleCallingContext const * getCurrentModuleOnThread()
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
~InitRootHandlers() override
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
void on_scheduler_entry(bool) override