19 #include "oneapi/tbb/concurrent_unordered_set.h" 20 #include "oneapi/tbb/task.h" 21 #include "oneapi/tbb/task_scheduler_observer.h" 22 #include "oneapi/tbb/global_control.h" 46 #include "TInterpreter.h" 49 #include "TUnixSystem.h" 51 #include "TVirtualStreamerInfo.h" 53 #include "TClassTable.h" 60 constexpr std::size_t moduleBufferSize = 128;
158 constexpr
bool s_ignoreEverything =
false;
160 template <std::
size_t SIZE>
161 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
162 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
163 return (
search.find(
s) != std::string::npos);
164 }) != substrs.end());
168 constexpr std::array<const char* const, 9> in_message{
169 {
"no dictionary for class",
170 "already in TClassTable",
171 "matrix not positive definite",
172 "not a TStreamerInfo object",
173 "Problems declaring payload",
174 "Announced number of args different from the real number of argument passed",
175 "nbins is <=0 - set to nbins = 1",
176 "nbinsy is <=0 - set to nbinsy = 1",
177 "oneapi::tbb::global_control is limiting"}};
180 constexpr std::array<const char* const, 7> in_location{{
"Fit",
181 "TDecompChol::Solve",
182 "THistPainter::PaintInit",
183 "TUnixSystem::SetDisplay",
184 "TGClient::GetFontByName",
186 "RTaskArenaWrapper"}};
188 constexpr std::array<const char* const, 3> in_message_print_error{{
"number of iterations was insufficient",
189 "bad integrand behavior",
190 "integral is divergent, or slowly convergent"}};
192 void RootErrorHandlerImpl(
int level,
char const*
location,
char const* message) {
199 if (
level >= kFatal) {
201 }
else if (
level >= kSysError) {
209 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
221 if (message !=
nullptr)
222 el_message = message;
233 size_t index1 = el_message.find(precursor);
234 if (index1 != std::string::npos) {
235 size_t index2 = index1 + precursor.length();
236 size_t index3 = el_message.find_first_of(
" :", index2);
237 if (index3 != std::string::npos) {
238 size_t substrlen = index3 - index2;
239 el_identifier +=
"-";
240 el_identifier += el_message.substr(index2, substrlen);
243 index1 = el_location.find(
"::");
244 if (index1 != std::string::npos) {
245 el_identifier +=
"/";
246 el_identifier += el_location.substr(0, index1);
252 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
253 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
254 (el_message.find(
"not set") != std::string::npos)) {
258 if ((el_message.find(
"Tree branches") != std::string::npos) &&
259 (el_message.find(
"different numbers of entries") != std::string::npos)) {
265 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
266 (
level <
kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
267 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
273 bool alreadyPrinted =
false;
274 if (find_if_string(el_message, in_message_print_error)) {
277 alreadyPrinted =
true;
292 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
293 std::ostringstream sstr;
294 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
297 except.clearMessage();
304 if (!alreadyPrinted) {
314 edm::LogInfo(
"Root_Information") << el_location << el_message;
319 void RootErrorHandler(
int level,
bool,
char const*
location,
char const* message) {
324 void set_default_signals() {
325 signal(SIGILL, SIG_DFL);
326 signal(SIGSEGV, SIG_DFL);
327 signal(SIGBUS, SIG_DFL);
329 signal(SIGFPE, SIG_DFL);
330 signal(SIGABRT, SIG_DFL);
333 static int full_write(
int fd,
const char*
text) {
340 if (errno == EINTR) {
352 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
355 ssize_t complete = 0;
356 std::chrono::time_point<std::chrono::steady_clock> end_time =
361 }
else if ((-1 == (
flags = fcntl(
fd, F_GETFL)))) {
370 if (timeout_s >= 0) {
371 struct pollfd poll_info {
376 if (ms_remaining > 0) {
377 int rc = poll(&poll_info, 1, ms_remaining);
380 if (errno == EINTR || errno == EAGAIN) {
392 }
else if (ms_remaining < 0) {
400 if (complete == -1) {
401 if (errno == EINTR) {
403 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
406 int orig_errno = errno;
422 static int full_cerr_write(
const char*
text) {
return full_write(2,
text); }
428 #if defined(SIGRTMAX) 429 #define PAUSE_SIGNAL SIGRTMAX 430 #define RESUME_SIGNAL SIGRTMAX - 1 431 #elif defined(SIGINFO) // macOS/BSD 432 #define PAUSE_SIGNAL SIGINFO 433 #define RESUME_SIGNAL SIGALRM 437 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
440 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
445 sigemptyset(&sigset);
446 sigaddset(&sigset, RESUME_SIGNAL);
447 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
457 strlcpy(buff,
"\nModule: ", moduleBufferSize);
462 strlcat(buff,
":", moduleBufferSize);
467 strlcat(buff,
"none", moduleBufferSize);
474 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
479 const auto self = pthread_self();
483 struct sigaction act;
484 act.sa_sigaction = sig_pause_for_stacktrace;
486 sigemptyset(&act.sa_mask);
487 sigaction(PAUSE_SIGNAL, &act,
nullptr);
490 sigset_t pausesigset;
491 sigemptyset(&pausesigset);
492 sigaddset(&pausesigset, PAUSE_SIGNAL);
493 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
496 for (
auto id : tids) {
498 pthread_kill(
id, PAUSE_SIGNAL);
504 act.sa_sigaction = sig_resume_handler;
505 sigaction(RESUME_SIGNAL, &act,
nullptr);
510 const char* signalname =
"unknown";
513 signalname =
"bus error";
517 signalname =
"segmentation violation";
521 signalname =
"illegal instruction";
525 signalname =
"floating point exception";
529 signalname =
"external termination request";
533 signalname =
"abort signal";
539 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
540 full_cerr_write(signalname);
541 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
550 std::size_t notified = 0;
552 for (
auto id : tids) {
554 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
561 full_cerr_write(
"\nCurrent Modules:\n");
568 if (tids.count(
self) > 0) {
569 char buff[moduleBufferSize] =
"\nModule: ";
574 strlcat(buff,
":", moduleBufferSize);
579 strlcat(buff,
"none", moduleBufferSize);
581 strlcat(buff,
" (crashed)", moduleBufferSize);
582 full_cerr_write(buff);
584 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
592 timespec
t = {0, 1000};
594 nanosleep(&
t,
nullptr);
602 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
603 full_cerr_write(signalname);
604 full_cerr_write(
"\n");
608 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig ==
SIGTERM) || (sig == SIGFPE) ||
610 signal(sig, SIG_DFL);
613 set_default_signals();
618 void sig_abort(
int sig, siginfo_t*,
void*) {
619 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
622 signal(sig, SIG_DFL);
626 set_default_signals();
651 int result = full_read(fromParent,
buf, 1);
656 set_default_signals();
658 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
659 full_cerr_write(strerror(-
result));
660 full_cerr_write(
"\n");
664 set_default_signals();
666 full_write(toParent,
buf);
667 }
else if (
buf[0] ==
'2') {
674 }
else if (
buf[0] ==
'3') {
677 set_default_signals();
679 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
680 full_cerr_write(
buf);
681 full_cerr_write(
"\n");
690 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
691 full_cerr_write(strerror(-
result));
692 full_cerr_write(
"\n");
698 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
699 if (
result == -ETIMEDOUT) {
700 full_cerr_write(
"timed out waiting for GDB to complete.");
702 full_cerr_write(strerror(-
result));
704 full_cerr_write(
"\n");
710 char child_stack[4 * 1024];
711 char* child_stack_ptr = child_stack + 4 * 1024;
721 if (child_stack_ptr) {
728 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
731 if (waitpid(pid, &
status, 0) == -1) {
732 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
735 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
741 set_default_signals();
748 syscall(SYS_execve,
"/bin/sh",
argv, __environ);
750 execv(
"/bin/sh",
argv);
756 static constexpr
char pstackName[] =
"(CMSSW stack trace helper)";
757 static constexpr
char dashC[] =
"-c";
770 unloadSigHandler_(
pset.getUntrackedParameter<
bool>(
"UnloadRootSigHandler")),
771 resetErrHandler_(
pset.getUntrackedParameter<
bool>(
"ResetRootErrHandler")),
772 loadAllDictionaries_(
pset.getUntrackedParameter<
bool>(
"LoadAllDictionaries")),
773 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<
bool>(
"AutoLibraryLoader")),
774 interactiveDebug_(
pset.getUntrackedParameter<
bool>(
"InteractiveDebug")) {
788 gSystem->ResetSignal(kSigChild);
789 gSystem->ResetSignal(kSigBus);
790 gSystem->ResetSignal(kSigSegmentationViolation);
791 gSystem->ResetSignal(kSigIllegalInstruction);
792 gSystem->ResetSignal(kSigSystem);
793 gSystem->ResetSignal(kSigPipe);
794 gSystem->ResetSignal(kSigAlarm);
795 gSystem->ResetSignal(kSigUrgent);
796 gSystem->ResetSignal(kSigFloatingException);
797 gSystem->ResetSignal(kSigWindowChanged);
798 }
else if (
pset.getUntrackedParameter<
bool>(
"AbortOnSignal")) {
803 gSystem->ResetSignal(kSigBus);
804 gSystem->ResetSignal(kSigSegmentationViolation);
805 gSystem->ResetSignal(kSigIllegalInstruction);
806 gSystem->ResetSignal(kSigFloatingException);
819 signal(SIGABRT, SIG_DFL);
831 SetErrorHandler(RootErrorHandler);
836 gInterpreter->SetClassAutoloading(1);
840 TTree::SetMaxTreeSize(kMaxLong64);
841 TH1::AddDirectory(kFALSE);
858 bool imt =
pset.getUntrackedParameter<
bool>(
"EnableIMT");
859 if (imt && not ROOT::IsImplicitMTEnabled()) {
862 ROOT::EnableImplicitMT(
863 oneapi::tbb::global_control::active_value(oneapi::tbb::global_control::max_allowed_parallelism));
869 TIter iter(gROOT->GetListOfFiles());
870 TObject*
obj =
nullptr;
871 while (
nullptr != (
obj = iter.Next())) {
872 TFile*
f =
dynamic_cast<TFile*
>(
obj);
877 iter = TIter(gROOT->GetListOfFiles());
886 ROOT::EnableThreadSafety();
889 TObject::SetObjectStat(
false);
892 TVirtualStreamerInfo::Optimize(
false);
897 desc.setComment(
"Centralized interface to ROOT.");
898 desc.addUntracked<
bool>(
"UnloadRootSigHandler",
false)
899 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
900 desc.addUntracked<
bool>(
"ResetRootErrHandler",
true)
902 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
903 desc.addUntracked<
bool>(
"AutoLibraryLoader",
true)
904 ->setComment(
"If True, enables automatic loading of data dictionaries.");
905 desc.addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
906 desc.addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
907 desc.addUntracked<
bool>(
"AbortOnSignal",
true)
909 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which " 910 "attempts to do a clean shutdown.");
911 desc.addUntracked<
bool>(
"InteractiveDebug",
false)
913 "If True, leave gdb attached to cmsRun after a crash; " 914 "if False, attach gdb, print a stack trace, and quit gdb");
915 desc.addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
916 desc.addUntracked<
int>(
"StackTracePauseTime", 300)
917 ->setComment(
"Seconds to pause other threads during stack trace.");
918 descriptions.
add(
"InitRootHandlers",
desc);
940 "set pagination no\n" 941 "thread apply all bt\n" 943 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
946 std::ostringstream sstr;
947 sstr <<
"Unable to pre-allocate stacktrace handler information";
965 std::ostringstream sstr;
966 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
976 std::ostringstream sstr;
977 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
void on_scheduler_exit(bool) override
static constexpr char dashC[]
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
bool loadAllDictionaries_
void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static void stacktraceFromThread()
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
Container_type threadIDs_
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
oneapi::tbb::concurrent_unordered_set< pthread_t > Container_type
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
Log< level::Error, false > LogError
std::shared_ptr< const void > sigFpeHandler_
friend int cmssw_stacktrace(void *)
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
std::shared_ptr< const void > sigBusHandler_
~ThreadTracker() override=default
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static char const *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static std::unique_ptr< ThreadTracker > threadTracker_
Log< level::Info, false > LogInfo
static constexpr int pidStringLength_
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
static char const *const pstackArgv_[]
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
unsigned int maxNumberOfThreads() const
std::string moduleName(StableProvenance const &provenance, ProcessHistory const &history)
void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
~InitRootHandlers() override
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
Log< level::Warning, false > LogWarning
static constexpr char pstackName[]
void on_scheduler_entry(bool) override