19 #include "oneapi/tbb/concurrent_unordered_set.h" 20 #include "oneapi/tbb/task.h" 21 #include "oneapi/tbb/task_scheduler_observer.h" 22 #include "oneapi/tbb/global_control.h" 46 #include "TInterpreter.h" 49 #include "TUnixSystem.h" 51 #include "TVirtualStreamerInfo.h" 53 #include "TClassTable.h" 60 constexpr std::size_t moduleBufferSize = 128;
159 constexpr bool s_ignoreEverything =
false;
161 template <std::
size_t SIZE>
162 bool find_if_string(
const std::string&
search,
const std::array<const char* const, SIZE>& substrs) {
163 return (std::find_if(substrs.begin(), substrs.end(), [&
search](
const char*
const s) ->
bool {
164 return (
search.find(
s) != std::string::npos);
165 }) != substrs.end());
169 constexpr std::array<const char* const, 9> in_message{
170 {
"no dictionary for class",
171 "already in TClassTable",
172 "matrix not positive definite",
173 "not a TStreamerInfo object",
174 "Problems declaring payload",
175 "Announced number of args different from the real number of argument passed",
176 "nbins is <=0 - set to nbins = 1",
177 "nbinsy is <=0 - set to nbinsy = 1",
178 "oneapi::tbb::global_control is limiting"}};
181 constexpr std::array<const char* const, 7> in_location{{
"Fit",
182 "TDecompChol::Solve",
183 "THistPainter::PaintInit",
184 "TUnixSystem::SetDisplay",
185 "TGClient::GetFontByName",
187 "RTaskArenaWrapper"}};
189 constexpr std::array<const char* const, 4> in_message_print_error{
190 {
"number of iterations was insufficient",
191 "bad integrand behavior",
192 "integral is divergent, or slowly convergent",
193 "VariableMetricBuilder Initial matrix not pos.def."}};
195 void RootErrorHandlerImpl(
int level,
char const*
location,
char const* message) {
202 if (
level >= kFatal) {
204 }
else if (
level >= kSysError) {
212 if (s_ignoreEverything || el_severity <= s_ignoreWarnings) {
224 if (message !=
nullptr)
225 el_message = message;
236 size_t index1 = el_message.find(precursor);
237 if (index1 != std::string::npos) {
238 size_t index2 = index1 + precursor.length();
239 size_t index3 = el_message.find_first_of(
" :", index2);
240 if (index3 != std::string::npos) {
241 size_t substrlen = index3 - index2;
242 el_identifier +=
"-";
243 el_identifier += el_message.substr(index2, substrlen);
246 index1 = el_location.find(
"::");
247 if (index1 != std::string::npos) {
248 el_identifier +=
"/";
249 el_identifier += el_location.substr(0, index1);
255 if ((el_location.find(
"TBranchElement::Fill") != std::string::npos) &&
256 (el_message.find(
"fill branch") != std::string::npos) && (el_message.find(
"address") != std::string::npos) &&
257 (el_message.find(
"not set") != std::string::npos)) {
261 if ((el_message.find(
"Tree branches") != std::string::npos) &&
262 (el_message.find(
"different numbers of entries") != std::string::npos)) {
268 if (find_if_string(el_message, in_message) || find_if_string(el_location, in_location) ||
269 (
level <
kError and (el_location.find(
"CINTTypedefBuilder::Setup") != std::string::npos) and
270 (el_message.find(
"possible entries are in use!") != std::string::npos))) {
276 bool alreadyPrinted =
false;
277 if (find_if_string(el_message, in_message_print_error)) {
280 alreadyPrinted =
true;
295 if (die && (el_location !=
std::string(
"@SUB=TUnixSystem::DispatchSignals"))) {
296 std::ostringstream sstr;
297 sstr <<
"Fatal Root Error: " << el_location <<
"\n" << el_message <<
'\n';
300 except.clearMessage();
307 if (!alreadyPrinted) {
317 edm::LogInfo(
"Root_Information") << el_location << el_message;
322 void RootErrorHandler(
int level,
bool,
char const*
location,
char const* message) {
327 void set_default_signals() {
328 signal(SIGILL, SIG_DFL);
329 signal(SIGSEGV, SIG_DFL);
330 signal(SIGBUS, SIG_DFL);
332 signal(SIGFPE, SIG_DFL);
333 signal(SIGABRT, SIG_DFL);
336 static int full_write(
int fd,
const char*
text) {
343 if (errno == EINTR) {
355 static int full_read(
int fd,
char* inbuf,
size_t len,
int timeout_s = -1) {
358 ssize_t complete = 0;
359 std::chrono::time_point<std::chrono::steady_clock> end_time =
364 }
else if ((-1 == (
flags = fcntl(
fd, F_GETFL)))) {
373 if (timeout_s >= 0) {
374 struct pollfd poll_info {
379 if (ms_remaining > 0) {
380 int rc = poll(&poll_info, 1, ms_remaining);
383 if (errno == EINTR || errno == EAGAIN) {
395 }
else if (ms_remaining < 0) {
403 if (complete == -1) {
404 if (errno == EINTR) {
406 }
else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
409 int orig_errno = errno;
425 static int full_cerr_write(
const char*
text) {
return full_write(2,
text); }
431 #if defined(SIGRTMAX) 432 #define PAUSE_SIGNAL SIGRTMAX 433 #define RESUME_SIGNAL SIGRTMAX - 1 434 #elif defined(SIGINFO) // macOS/BSD 435 #define PAUSE_SIGNAL SIGINFO 436 #define RESUME_SIGNAL SIGALRM 440 void sig_resume_handler(
int sig, siginfo_t*,
void*) {}
443 void sig_pause_for_stacktrace(
int sig, siginfo_t*,
void*) {
448 sigemptyset(&sigset);
449 sigaddset(&sigset, RESUME_SIGNAL);
450 pthread_sigmask(SIG_UNBLOCK, &sigset,
nullptr);
460 strlcpy(buff,
"\nModule: ", moduleBufferSize);
465 strlcat(buff,
":", moduleBufferSize);
470 strlcat(buff,
"none", moduleBufferSize);
477 void sig_dostack_then_abort(
int sig, siginfo_t*,
void*) {
482 const auto self = pthread_self();
486 struct sigaction act;
487 act.sa_sigaction = sig_pause_for_stacktrace;
489 sigemptyset(&act.sa_mask);
490 sigaction(PAUSE_SIGNAL, &act,
nullptr);
493 sigset_t pausesigset;
494 sigemptyset(&pausesigset);
495 sigaddset(&pausesigset, PAUSE_SIGNAL);
496 sigprocmask(SIG_UNBLOCK, &pausesigset,
nullptr);
499 for (
auto id : tids) {
501 pthread_kill(
id, PAUSE_SIGNAL);
507 act.sa_sigaction = sig_resume_handler;
508 sigaction(RESUME_SIGNAL, &act,
nullptr);
513 const char* signalname =
"unknown";
516 signalname =
"bus error";
520 signalname =
"segmentation violation";
524 signalname =
"illegal instruction";
528 signalname =
"floating point exception";
532 signalname =
"external termination request";
536 signalname =
"abort signal";
542 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
543 full_cerr_write(signalname);
544 full_cerr_write(
"\nThe following is the call stack containing the origin of the signal.\n\n");
553 std::size_t notified = 0;
555 for (
auto id : tids) {
557 if (pthread_kill(
id, RESUME_SIGNAL) == 0)
564 full_cerr_write(
"\nCurrent Modules:\n");
571 if (tids.count(
self) > 0) {
572 char buff[moduleBufferSize] =
"\nModule: ";
577 strlcat(buff,
":", moduleBufferSize);
582 strlcat(buff,
"none", moduleBufferSize);
584 strlcat(buff,
" (crashed)", moduleBufferSize);
585 full_cerr_write(buff);
587 full_cerr_write(
"\nModule: non-CMSSW (crashed)");
595 timespec
t = {0, 1000};
597 nanosleep(&
t,
nullptr);
605 full_cerr_write(
"\n\nA fatal system signal has occurred: ");
606 full_cerr_write(signalname);
607 full_cerr_write(
"\n");
611 if ((sig == SIGILL) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig ==
SIGTERM) || (sig == SIGFPE) ||
613 signal(sig, SIG_DFL);
616 set_default_signals();
621 void sig_abort(
int sig, siginfo_t*,
void*) {
622 full_cerr_write(
"\n\nFatal system signal has occurred during exit\n");
625 signal(sig, SIG_DFL);
629 set_default_signals();
654 int result = full_read(fromParent,
buf, 1);
659 set_default_signals();
661 full_cerr_write(
"\n\nTraceback helper thread failed to read from parent: ");
662 full_cerr_write(strerror(-
result));
663 full_cerr_write(
"\n");
667 set_default_signals();
669 full_write(toParent,
buf);
670 }
else if (
buf[0] ==
'2') {
677 }
else if (
buf[0] ==
'3') {
680 set_default_signals();
682 full_cerr_write(
"\n\nTraceback helper thread got unknown command from parent: ");
683 full_cerr_write(
buf);
684 full_cerr_write(
"\n");
693 full_cerr_write(
"\n\nAttempt to request stacktrace failed: ");
694 full_cerr_write(strerror(-
result));
695 full_cerr_write(
"\n");
701 full_cerr_write(
"\n\nWaiting for stacktrace completion failed: ");
702 if (
result == -ETIMEDOUT) {
703 full_cerr_write(
"timed out waiting for GDB to complete.");
705 full_cerr_write(strerror(-
result));
707 full_cerr_write(
"\n");
713 char child_stack[4 * 1024];
714 char* child_stack_ptr = child_stack + 4 * 1024;
724 if (child_stack_ptr) {
731 full_cerr_write(
"(Attempt to perform stack dump failed.)\n");
734 if (waitpid(pid, &
status, 0) == -1) {
735 full_cerr_write(
"(Failed to wait on stack dump output.)\n");
738 full_cerr_write(
"(GDB stack trace failed unexpectedly)\n");
744 set_default_signals();
751 syscall(SYS_execve,
"/bin/sh",
argv, __environ);
753 execv(
"/bin/sh",
argv);
773 unloadSigHandler_(
pset.getUntrackedParameter<
bool>(
"UnloadRootSigHandler")),
774 resetErrHandler_(
pset.getUntrackedParameter<
bool>(
"ResetRootErrHandler")),
775 loadAllDictionaries_(
pset.getUntrackedParameter<
bool>(
"LoadAllDictionaries")),
776 autoLibraryLoader_(loadAllDictionaries_
or pset.getUntrackedParameter<
bool>(
"AutoLibraryLoader")),
777 autoClassParser_(
pset.getUntrackedParameter<
bool>(
"AutoClassParser")),
778 interactiveDebug_(
pset.getUntrackedParameter<
bool>(
"InteractiveDebug")) {
792 gSystem->ResetSignal(kSigChild);
793 gSystem->ResetSignal(kSigBus);
794 gSystem->ResetSignal(kSigSegmentationViolation);
795 gSystem->ResetSignal(kSigIllegalInstruction);
796 gSystem->ResetSignal(kSigSystem);
797 gSystem->ResetSignal(kSigPipe);
798 gSystem->ResetSignal(kSigAlarm);
799 gSystem->ResetSignal(kSigUrgent);
800 gSystem->ResetSignal(kSigFloatingException);
801 gSystem->ResetSignal(kSigWindowChanged);
802 }
else if (
pset.getUntrackedParameter<
bool>(
"AbortOnSignal")) {
807 gSystem->ResetSignal(kSigBus);
808 gSystem->ResetSignal(kSigSegmentationViolation);
809 gSystem->ResetSignal(kSigIllegalInstruction);
810 gSystem->ResetSignal(kSigFloatingException);
823 signal(SIGABRT, SIG_DFL);
835 SetErrorHandler(RootErrorHandler);
840 gInterpreter->SetClassAutoloading(1);
853 TTree::SetMaxTreeSize(kMaxLong64);
854 TH1::AddDirectory(kFALSE);
871 bool imt =
pset.getUntrackedParameter<
bool>(
"EnableIMT");
872 if (imt && not ROOT::IsImplicitMTEnabled()) {
875 ROOT::EnableImplicitMT(
876 oneapi::tbb::global_control::active_value(oneapi::tbb::global_control::max_allowed_parallelism));
882 TIter iter(gROOT->GetListOfFiles());
883 TObject*
obj =
nullptr;
884 while (
nullptr != (
obj = iter.Next())) {
885 TFile*
f =
dynamic_cast<TFile*
>(
obj);
890 iter = TIter(gROOT->GetListOfFiles());
899 ROOT::EnableThreadSafety();
902 TObject::SetObjectStat(
false);
905 TVirtualStreamerInfo::Optimize(
false);
910 desc.setComment(
"Centralized interface to ROOT.");
911 desc.addUntracked<
bool>(
"UnloadRootSigHandler",
false)
912 ->setComment(
"If True, signals are handled by this service, rather than by ROOT.");
913 desc.addUntracked<
bool>(
"ResetRootErrHandler",
true)
915 "If True, ROOT messages (e.g. errors, warnings) are handled by this service, rather than by ROOT.");
916 desc.addUntracked<
bool>(
"AutoLibraryLoader",
true)
917 ->setComment(
"If True, enables automatic loading of data dictionaries.");
918 desc.addUntracked<
bool>(
"AutoClassParser",
true)
920 "If False, the automatic parsing of class headers for dictionaries when pre-built dictionaries are " 921 "missing is disable during module construction. The current implementation of disabling the parsing is " 922 "fragile, and may work only in a single-thread job that does not use reco::parser::cutParser() or " 923 "reco::parser::expressionParser() (and it certainly does not work on multiple threads).");
924 desc.addUntracked<
bool>(
"LoadAllDictionaries",
false)->setComment(
"If True, loads all ROOT dictionaries.");
925 desc.addUntracked<
bool>(
"EnableIMT",
true)->setComment(
"If True, calls ROOT::EnableImplicitMT().");
926 desc.addUntracked<
bool>(
"AbortOnSignal",
true)
928 "If True, do an abort when a signal occurs that causes a crash. If False, ROOT will do an exit which " 929 "attempts to do a clean shutdown.");
930 desc.addUntracked<
bool>(
"InteractiveDebug",
false)
932 "If True, leave gdb attached to cmsRun after a crash; " 933 "if False, attach gdb, print a stack trace, and quit gdb");
934 desc.addUntracked<
int>(
"DebugLevel", 0)->setComment(
"Sets ROOT's gDebug value.");
935 desc.addUntracked<
int>(
"StackTracePauseTime", 300)
936 ->setComment(
"Seconds to pause other threads during stack trace.");
937 descriptions.
add(
"InitRootHandlers",
desc);
959 "set pagination no\n" 960 "thread apply all bt\n" 962 "/bin/sed -n -e 's/^\\((gdb) \\)*//' -e '/^#/p' -e '/^Thread/p'";
965 std::ostringstream sstr;
966 sstr <<
"Unable to pre-allocate stacktrace handler information";
984 std::ostringstream sstr;
985 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
995 std::ostringstream sstr;
996 sstr <<
"Failed to create child-to-parent pipes (errno=" << errno <<
"): " << strerror(errno);
void on_scheduler_exit(bool) override
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
static constexpr char dashC[]
edm::serviceregistry::AllArgsMaker< edm::RootHandlers, InitRootHandlers > RootHandlersMaker
bool loadAllDictionaries_
void enableWarnings_() override
static void cmssw_stacktrace_fork()
#define DEFINE_FWK_SERVICE_MAKER(concrete, maker)
void watchPreallocate(Preallocate::slot_type const &iSlot)
void setRefCoreStreamerInTClass()
static void stacktraceFromThread()
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
Container_type threadIDs_
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
std::vector< T >::const_iterator search(const cond::Time_t &val, const std::vector< T > &container)
oneapi::tbb::concurrent_unordered_set< pthread_t > Container_type
bool isProcessWideService(TFileService const *)
static ModuleCallingContext const * getCurrentModuleOnThread()
static int stackTracePause_
void installCustomHandler(int signum, CFUNC func)
std::shared_ptr< const void > sigSegvHandler_
Log< level::Error, false > LogError
std::shared_ptr< const void > sigFpeHandler_
friend int cmssw_stacktrace(void *)
void ignoreWarnings_(edm::RootHandlers::SeverityLevel level) override
std::shared_ptr< const void > sigBusHandler_
~ThreadTracker() override=default
static TypeWithDict byName(std::string const &name)
static std::atomic< std::size_t > doneModules_
static const ThreadTracker::Container_type & threadIDs()
std::shared_ptr< const void > sigAbrtHandler_
static std::atomic< std::size_t > nextModule_
static char pidString_[pidStringLength_]
static char const *const * getPstackArgv()
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
std::shared_ptr< const void > sigIllHandler_
static int childToParent_[2]
std::shared_ptr< const void > sigTermHandler_
void addAdditionalInfo(std::string const &info)
int cmssw_stacktrace(void *)
static std::unique_ptr< std::thread > helperThread_
static std::vector< std::array< char, moduleBufferSize > > moduleListBuffers_
static std::unique_ptr< ThreadTracker > threadTracker_
Log< level::Info, false > LogInfo
static constexpr int pidStringLength_
InitRootHandlers(ParameterSet const &pset, ActivityRegistry &iReg)
static char const *const pstackArgv_[]
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TEveGeoShape * clone(const TEveElement *element, TEveElement *parent)
static int parentToChild_[2]
unsigned int maxNumberOfThreads() const
std::string moduleName(StableProvenance const &provenance, ProcessHistory const &history)
void willBeUsingThreads() override
char data[epos_bytes_allocation]
static void fillDescriptions(ConfigurationDescriptions &descriptions)
~InitRootHandlers() override
const Container_type & IDs()
static int stackTracePause()
static void stacktraceHelperThread()
bool hasDictionary(std::type_info const &)
Log< level::Warning, false > LogWarning
static constexpr char pstackName[]
void on_scheduler_entry(bool) override