Go to the documentation of this file.00001
00003
00004
00005 #include "sentinel/utils/version.h"
00006 #include "sentinel/utils/Alarm.h"
00007
00008 #include "xcept/tools.h"
00009 #include "xdata/InfoSpaceFactory.h"
00010
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/CommandQueue.h"
00013 #include "EventFilter/StorageManager/interface/SharedResources.h"
00014 #include "EventFilter/StorageManager/interface/StateMachine.h"
00015 #include "EventFilter/StorageManager/interface/StatisticsReporter.h"
00016
00017
00018 namespace stor {
00019
00020 AlarmHandler::AlarmHandler
00021 (
00022 xdaq::Application* app
00023 ) :
00024 app_(app)
00025 {
00026 try
00027 {
00028 alarmInfoSpace_ = xdata::getInfoSpaceFactory()->get("urn:xdaq-sentinel:alarms");
00029 }
00030 catch(xdata::exception::Exception)
00031 {
00032
00033 alarmInfoSpace_ = 0;
00034 }
00035 }
00036
00037
00038 AlarmHandler::AlarmHandler
00039 (
00040 xdaq::Application* app,
00041 SharedResourcesPtr sr
00042 ) :
00043 app_(app),
00044 sharedResources_(sr)
00045 {
00046 try
00047 {
00048 alarmInfoSpace_ = xdata::getInfoSpaceFactory()->get("urn:xdaq-sentinel:alarms");
00049 }
00050 catch(xdata::exception::Exception)
00051 {
00052
00053 alarmInfoSpace_ = 0;
00054 }
00055 }
00056
00057
00058 void AlarmHandler::raiseAlarm
00059 (
00060 const std::string name,
00061 const ALARM_LEVEL level,
00062 xcept::Exception& exception
00063 )
00064 {
00065
00066 switch( level )
00067 {
00068 case OKAY:
00069 revokeAlarm(name);
00070 break;
00071
00072 case WARNING:
00073 if ( raiseAlarm(name, "warning", exception) )
00074 LOG4CPLUS_WARN(app_->getApplicationLogger(),
00075 "Raising warning alarm " << name << ": " << exception.message());
00076 break;
00077
00078 case ERROR:
00079 if ( raiseAlarm(name, "error", exception) )
00080 LOG4CPLUS_ERROR(app_->getApplicationLogger(),
00081 "Raising error alarm " << name << ": " << exception.message());
00082 break;
00083
00084 case FATAL:
00085 if ( raiseAlarm(name, "fatal", exception) )
00086 LOG4CPLUS_FATAL(app_->getApplicationLogger(),
00087 "Raising fatal alarm " << name << ": " << exception.message());
00088 break;
00089
00090 default:
00091 LOG4CPLUS_WARN(app_->getApplicationLogger(),
00092 "Unknown alarm level received for " << name << ": " << exception.message());
00093 }
00094 }
00095
00096
00097 void AlarmHandler::notifySentinel
00098 (
00099 const ALARM_LEVEL level,
00100 xcept::Exception& exception
00101 )
00102 {
00103
00104 switch( level )
00105 {
00106 case OKAY:
00107 LOG4CPLUS_INFO(app_->getApplicationLogger(),
00108 xcept::stdformat_exception_history(exception));
00109 break;
00110
00111 case WARNING:
00112 LOG4CPLUS_WARN(app_->getApplicationLogger(),
00113 xcept::stdformat_exception_history(exception));
00114 app_->notifyQualified("warning", exception);
00115 break;
00116
00117 case ERROR:
00118 LOG4CPLUS_ERROR(app_->getApplicationLogger(),
00119 xcept::stdformat_exception_history(exception));
00120 app_->notifyQualified("error", exception);
00121 break;
00122
00123 case FATAL:
00124 LOG4CPLUS_FATAL(app_->getApplicationLogger(),
00125 xcept::stdformat_exception_history(exception));
00126 app_->notifyQualified("fatal", exception);
00127 break;
00128
00129 default:
00130 LOG4CPLUS_WARN(app_->getApplicationLogger(),
00131 "Unknown alarm level received for exception: " <<
00132 xcept::stdformat_exception_history(exception));
00133 }
00134 }
00135
00136
00137 bool AlarmHandler::raiseAlarm
00138 (
00139 const std::string name,
00140 const std::string level,
00141 xcept::Exception& exception
00142 )
00143 {
00144
00145 if (!alarmInfoSpace_) return false;
00146
00147 boost::mutex::scoped_lock sl( mutex_ );
00148
00149 sentinel::utils::Alarm *alarm =
00150 new sentinel::utils::Alarm(level, exception, app_);
00151 try
00152 {
00153 alarmInfoSpace_->fireItemAvailable(name, alarm);
00154 }
00155 catch(xdata::exception::Exception)
00156 {
00157
00158 return false;
00159 }
00160 return true;
00161 }
00162
00163
00164 void AlarmHandler::revokeAlarm
00165 (
00166 const std::string name
00167 )
00168 {
00169 if (!alarmInfoSpace_) return;
00170
00171 boost::mutex::scoped_lock sl( mutex_ );
00172
00173 sentinel::utils::Alarm *alarm;
00174 try
00175 {
00176 alarm = dynamic_cast<sentinel::utils::Alarm*>( alarmInfoSpace_->find( name ) );
00177 }
00178 catch(xdata::exception::Exception)
00179 {
00180
00181 return;
00182 }
00183
00184 LOG4CPLUS_INFO(app_->getApplicationLogger(), "Revoking alarm " << name);
00185
00186 alarmInfoSpace_->fireItemRevoked(name, app_);
00187 delete alarm;
00188 }
00189
00190
00191 void AlarmHandler::clearAllAlarms()
00192 {
00193 if (!alarmInfoSpace_) return;
00194
00195 boost::mutex::scoped_lock sl( mutex_ );
00196
00197 typedef std::map<std::string, xdata::Serializable*, std::less<std::string> > alarmList;
00198 alarmList alarms = alarmInfoSpace_->match(".*");
00199 for (alarmList::const_iterator it = alarms.begin(), itEnd = alarms.end();
00200 it != itEnd; ++it)
00201 {
00202 sentinel::utils::Alarm* alarm = dynamic_cast<sentinel::utils::Alarm*>(it->second);
00203 alarmInfoSpace_->fireItemRevoked(it->first, app_);
00204 delete alarm;
00205 }
00206 }
00207
00208
00209 void AlarmHandler::moveToFailedState( xcept::Exception& exception )
00210 {
00211 std::string errorMsg = "Failed to process FAIL exception: "
00212 + xcept::stdformat_exception_history(exception) + " due to ";
00213
00214 try
00215 {
00216 notifySentinel(AlarmHandler::FATAL, exception);
00217 sharedResources_->statisticsReporter_->getStateMachineMonitorCollection().setStatusMessage(
00218 xcept::stdformat_exception_history(exception)
00219 );
00220 EventPtr_t stMachEvent( new Fail() );
00221
00222 if ( ! sharedResources_->commandQueue_->enqTimedWait( stMachEvent, boost::posix_time::seconds(5) ) )
00223 {
00224 XCEPT_DECLARE_NESTED( stor::exception::StateTransition,
00225 sentinelException, "Failed to enqueue FAIL event", exception );
00226 notifySentinel(AlarmHandler::FATAL, sentinelException);
00227 }
00228 }
00229 catch(xcept::Exception &e)
00230 {
00231 errorMsg += xcept::stdformat_exception_history(e);
00232 localDebug( errorMsg );
00233 }
00234 catch(std::exception &e)
00235 {
00236 errorMsg += e.what();
00237 localDebug( errorMsg );
00238 }
00239 catch( ... )
00240 {
00241 errorMsg += "an unknown exception.";
00242 localDebug( errorMsg );
00243 }
00244 }
00245
00246
00247 void AlarmHandler::localDebug( const std::string& message ) const
00248 {
00249 std::ostringstream fname_oss;
00250 fname_oss << "/tmp/storage_manager_debug_" <<
00251 sharedResources_->configuration_->getDiskWritingParams().smInstanceString_ <<
00252 "_" << getpid();
00253 const std::string fname = fname_oss.str();
00254 std::ofstream f( fname.c_str(), std::ios_base::ate | std::ios_base::out | std::ios_base::app );
00255 if( f.is_open() )
00256 {
00257 try
00258 {
00259 f << message << std::endl;
00260 f.close();
00261 }
00262 catch(...)
00263 {}
00264 }
00265 }
00266
00267 }
00268
00269