22 #include "tbb/task_scheduler_observer.h" 23 #include "tbb/concurrent_unordered_set.h" 45 #include "TInterpreter.h" 48 #include "TUnixSystem.h" 50 #include "TVirtualStreamerInfo.h" 52 #include "TClassTable.h" 59 constexpr std::size_t moduleBufferSize = 128;
146 bool s_ignoreEverything =
false;
148 template <std::
size_t SIZE>
149 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
150 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
151 return (search.find(
s) != std::string::npos);
152 }) != substrs.end());
155 constexpr std::array<const char* const, 8> in_message{
156 {
"no dictionary for class",
157 "already in TClassTable",
158 "matrix not positive definite",
159 "not a TStreamerInfo object",
160 "Problems declaring payload",
161 "Announced number of args different from the real number of argument passed",
162 "nbins is <=0 - set to nbins = 1",
163 "nbinsy is <=0 - set to nbinsy = 1"}};
165 constexpr std::array<const char* const, 6> in_location{{
"Fit",
166 "TDecompChol::Solve",
167 "THistPainter::PaintInit",
168 "TUnixSystem::SetDisplay",
169 "TGClient::GetFontByName",
172 constexpr std::array<const char* const, 4> in_message_print{{
"number of iterations was insufficient",
173 "bad integrand behavior",
174 "integral is divergent, or slowly convergent",
175 "but fEntryCurrent should not be in between the two"}};
177 void RootErrorHandlerImpl(
int level,
char const* location,
char const* message) {
184 if (level >= kFatal) {
186 }
else if (level >= kSysError) {
188 }
else if (level >= kError) {
190 }
else if (level >= kWarning) {
194 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
202 if (location !=
nullptr)
206 if (message !=
nullptr)
207 el_message = message;
218 size_t index1 = el_message.find(precursor);
219 if (index1 != std::string::npos) {
220 size_t index2 = index1 + precursor.length();
221 size_t index3 = el_message.find_first_of(
" :", index2);
222 if (index3 != std::string::npos) {
223 size_t substrlen = index3 - index2;
224 el_identifier +=
"-";
225 el_identifier += el_message.substr(index2, substrlen);
228 index1 = el_location.find(
"::");
229 if (index1 != std::string::npos) {
230 el_identifier +=
"/";
231 el_identifier += el_location.substr(0, index1);
237 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
238 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
239 (el_message.find(
"not set") != std::string::npos)) {
243 if ((el_message.find(
"Tree branches") != std::string::npos) &&
244 (el_message.find(
"different numbers of entries") != std::string::npos)) {
250 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
251 (level < kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
252 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
258 bool alreadyPrinted =
false;
259 if (find_if_string(el_message, in_message_print)) {
262 alreadyPrinted =
true;
277 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
278 std::ostringstream sstr;
279 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
282 except.clearMessage();
289 if (!alreadyPrinted) {
299 edm::LogInfo(
"Root_Information") << el_location << el_message;
304 void RootErrorHandler(
int level,
bool,
char const* location,
char const* message) {
305 RootErrorHandlerImpl(level, location, message);
309 void set_default_signals() {
310 signal(SIGILL, SIG_DFL);
311 signal(SIGSEGV, SIG_DFL);
312 signal(SIGBUS, SIG_DFL);
313 signal(SIGTERM, SIG_DFL);
314 signal(SIGABRT, SIG_DFL);
317 static int full_write(
int fd,
const char*
text) {
319 size_t count = strlen(text);
322 written =
write(fd, buffer, count);
324 if (errno == EINTR) {
336 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
339 ssize_t complete = 0;
340 std::chrono::time_point<std::chrono::steady_clock> end_time =
345 }
else if ((-1 == (flags = fcntl(fd, F_GETFL)))) {
349 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
354 if (timeout_s >= 0) {
355 struct pollfd poll_info {
360 if (ms_remaining > 0) {
361 int rc = poll(&poll_info, 1, ms_remaining);
364 if (errno == EINTR || errno == EAGAIN) {
371 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
372 fcntl(fd, F_SETFL, flags);
376 }
else if (ms_remaining < 0) {
377 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
378 fcntl(fd, F_SETFL, flags);
383 complete = read(fd, buf, count);
384 if (complete == -1) {
385 if (errno == EINTR) {
387 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
390 int orig_errno = errno;
391 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
392 fcntl(fd, F_SETFL, flags);
400 if ((flags & O_NONBLOCK) != O_NONBLOCK) {
401 fcntl(fd, F_SETFL, flags);
406 static int full_cerr_write(
const char* text) {
return full_write(2, text); }
412 #if defined(SIGRTMAX) 413 #define PAUSE_SIGNAL SIGRTMAX 414 #define RESUME_SIGNAL SIGRTMAX - 1 415 #elif defined(SIGINFO) // macOS/BSD 416 #define PAUSE_SIGNAL SIGINFO 417 #define RESUME_SIGNAL SIGALRM 421 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
424 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
429 sigemptyset(&sigset);
430 sigaddset(&sigset, RESUME_SIGNAL);
431 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
441 strlcpy(buff,
"\nModule: ", moduleBufferSize);
446 strlcat(buff,
":", moduleBufferSize);
451 strlcat(buff,
"none", moduleBufferSize);
458 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
463 const auto self = pthread_self();
467 struct sigaction act;
468 act.sa_sigaction = sig_pause_for_stacktrace;
470 sigemptyset(&act.sa_mask);
471 sigaction(PAUSE_SIGNAL, &act,
nullptr);
474 sigset_t pausesigset;
475 sigemptyset(&pausesigset);
476 sigaddset(&pausesigset, PAUSE_SIGNAL);
477 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
480 for (
auto id : tids) {
482 pthread_kill(
id, PAUSE_SIGNAL);
488 act.sa_sigaction = sig_resume_handler;
489 sigaction(RESUME_SIGNAL, &act,
nullptr);
494 const char* signalname =
"unknown";
497 signalname =
"bus error";
501 signalname =
"segmentation violation";
505 signalname =
"illegal instruction";
509 signalname =
"external termination request";
513 signalname =
"abort signal";
519 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
520 full_cerr_write(signalname);
521 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
530 std::size_t notified = 0;
532 for (
auto id : tids) {
534 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
541 full_cerr_write(
"\nCurrent Modules:\n");
548 if (tids.count(
self) > 0) {
549 char buff[moduleBufferSize] =
"\nModule: ";
554 strlcat(buff,
":", moduleBufferSize);
559 strlcat(buff,
"none", moduleBufferSize);
561 strlcat(buff,
" (crashed)", moduleBufferSize);
562 full_cerr_write(buff);
564 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
572 timespec
t = {0, 1000};
574 nanosleep(&t,
nullptr);
582 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
583 full_cerr_write(signalname);
584 full_cerr_write(
"\n");
588 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGTERM) || (sig == SIGABRT)) {
589 signal(sig, SIG_DFL);
592 set_default_signals();
597 void sig_abort(
int sig, siginfo_t*,
void*) {
598 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
601 signal(sig, SIG_DFL);
605 set_default_signals();
630 int result = full_read(fromParent, buf, 1);
635 set_default_signals();
637 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
638 full_cerr_write(strerror(-result));
639 full_cerr_write(
"\n");
643 set_default_signals();
645 full_write(toParent, buf);
646 }
else if (buf[0] ==
'2') {
653 }
else if (buf[0] ==
'3') {
656 set_default_signals();
658 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
659 full_cerr_write(buf);
660 full_cerr_write(
"\n");
669 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
670 full_cerr_write(strerror(-result));
671 full_cerr_write(
"\n");
676 if ((result = full_read(
childToParent_[0], buf, 1, 5 * 60)) < 0) {
677 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
678 if (result == -ETIMEDOUT) {
679 full_cerr_write(
"timed out waiting for GDB to complete.");
681 full_cerr_write(strerror(-result));
683 full_cerr_write(
"\n");
689 char child_stack[4 * 1024];
690 char* child_stack_ptr = child_stack + 4 * 1024;
700 if (child_stack_ptr) {
707 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
710 if (waitpid(pid, &status, 0) == -1) {
711 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
714 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
720 set_default_signals();
727 syscall(SYS_execve,
"/bin/sh", argv, __environ);
729 execv(
"/bin/sh", argv);
757 gSystem->ResetSignal(kSigChild);
758 gSystem->ResetSignal(kSigBus);
759 gSystem->ResetSignal(kSigSegmentationViolation);
760 gSystem->ResetSignal(kSigIllegalInstruction);
761 gSystem->ResetSignal(kSigSystem);
762 gSystem->ResetSignal(kSigPipe);
763 gSystem->ResetSignal(kSigAlarm);
764 gSystem->ResetSignal(kSigUrgent);
765 gSystem->ResetSignal(kSigFloatingException);
766 gSystem->ResetSignal(kSigWindowChanged);
772 gSystem->ResetSignal(kSigBus);
773 gSystem->ResetSignal(kSigSegmentationViolation);
774 gSystem->ResetSignal(kSigIllegalInstruction);
785 signal(SIGABRT, SIG_DFL);
797 SetErrorHandler(RootErrorHandler);
802 gInterpreter->SetClassAutoloading(1);
806 TTree::SetMaxTreeSize(kMaxLong64);
807 TH1::AddDirectory(kFALSE);
814 if (!
hasDictionary(
typeid(std::vector<std::vector<unsigned int>>))) {
819 if (debugLevel > 0) {
825 if (imt && not ROOT::IsImplicitMTEnabled()) {
826 ROOT::EnableImplicitMT();
832 TIter iter(gROOT->GetListOfFiles());
833 TObject*
obj =
nullptr;
834 while (
nullptr != (obj = iter.Next())) {
835 TFile*
f =
dynamic_cast<TFile*
>(
obj);
840 iter = TIter(gROOT->GetListOfFiles());
847 ROOT::EnableThreadSafety();
850 TObject::SetObjectStat(
false);
853 TVirtualStreamerInfo::Optimize(
false);
858 desc.
setComment(
"Centralized interface to ROOT.");
860 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
863 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
865 ->setComment(
"If True, enables automatic loading of data dictionaries.");
866 desc.
addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
867 desc.
addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
870 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which " 871 "attempts to do a clean shutdown.");
872 desc.
addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
874 ->setComment(
"Seconds to pause other threads during stack trace.");
875 descriptions.
add(
"InitRootHandlers", desc);
893 "date; gdb -quiet -p %d 2>&1 <<EOF |\n" 896 "set pagination no\n" 897 "thread apply all bt\n" 899 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'",
901 std::ostringstream sstr;
902 sstr <<
"Unable to pre-allocate stacktrace handler information";
920 std::ostringstream sstr;
921 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
931 std::ostringstream sstr;
932 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
unsigned int maxNumberOfThreads() const
T getUntrackedParameter(std::string const &, T const &) const
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
bool loadAllDictionaries_
void enableWarnings_() override
static void cmssw_stacktrace_fork()
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static void stacktraceFromThread()
static char *const pstackArgv_[]
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Container_type threadIDs_
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
bool isProcessWideService(TFileService const *)
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
static ModuleCallingContext const * getCurrentModuleOnThread()
std::vector< Variable::Flags > flags
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
friend int cmssw_stacktrace(void *)
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
std::shared_ptr< const void > sigBusHandler_
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
void setComment(std::string const &value)
std::string moduleName(Provenance const &provenance)
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static ThreadTracker threadTracker_
static char *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
tbb::concurrent_unordered_set< pthread_t > Container_type
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
~InitRootHandlers() override
static const int pidStringLength_
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
void on_scheduler_entry(bool) override