CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_7/src/EventFilter/StorageManager/src/AlarmHandler.cc

Go to the documentation of this file.
00001 //$Id: AlarmHandler.cc,v 1.12 2011/11/08 10:48:40 mommsen Exp $
00003 
00004 
00005 #include "sentinel/utils/version.h"
00006 #include "sentinel/utils/Alarm.h"
00007 
00008 #include "xcept/tools.h"
00009 #include "xdata/InfoSpaceFactory.h"
00010 
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/CommandQueue.h"
00013 #include "EventFilter/StorageManager/interface/SharedResources.h"
00014 #include "EventFilter/StorageManager/interface/StateMachine.h"
00015 #include "EventFilter/StorageManager/interface/StatisticsReporter.h"
00016 
00017 
00018 namespace stor {
00019 
00020   AlarmHandler::AlarmHandler
00021   (
00022     xdaq::Application* app
00023   ) :
00024   app_(app)
00025   {
00026     try
00027     {
00028       alarmInfoSpace_ = xdata::getInfoSpaceFactory()->get("urn:xdaq-sentinel:alarms");
00029     }
00030     catch(xdata::exception::Exception)
00031     {
00032       // sentinel is not available
00033       alarmInfoSpace_ = 0;
00034     }
00035   }
00036   
00037 
00038   AlarmHandler::AlarmHandler
00039   (
00040     xdaq::Application* app,
00041     SharedResourcesPtr sr
00042   ) :
00043   app_(app),
00044   sharedResources_(sr)
00045   {
00046     try
00047     {
00048       alarmInfoSpace_ = xdata::getInfoSpaceFactory()->get("urn:xdaq-sentinel:alarms");
00049     }
00050     catch(xdata::exception::Exception)
00051     {
00052       // sentinel is not available
00053       alarmInfoSpace_ = 0;
00054     }
00055   }
00056   
00057   
00058   void AlarmHandler::raiseAlarm
00059   (
00060     const std::string name,
00061     const ALARM_LEVEL level,
00062     xcept::Exception& exception
00063   )
00064   {
00065     
00066     switch( level )
00067     {
00068       case OKAY:
00069         revokeAlarm(name);
00070         break;
00071         
00072       case WARNING:
00073         if ( raiseAlarm(name, "warning", exception) )
00074           LOG4CPLUS_WARN(app_->getApplicationLogger(),
00075             "Raising warning alarm " << name << ": " << exception.message());
00076         break;
00077         
00078       case ERROR:
00079         if ( raiseAlarm(name, "error", exception) )
00080           LOG4CPLUS_ERROR(app_->getApplicationLogger(),
00081             "Raising error alarm " << name << ": " << exception.message());
00082         break;
00083         
00084       case FATAL:
00085         if ( raiseAlarm(name, "fatal", exception) )
00086           LOG4CPLUS_FATAL(app_->getApplicationLogger(),
00087             "Raising fatal alarm " << name << ": " << exception.message());
00088         break;
00089         
00090       default:
00091         LOG4CPLUS_WARN(app_->getApplicationLogger(),
00092           "Unknown alarm level received for " << name << ": " << exception.message());
00093     }
00094   }
00095   
00096   
00097   void AlarmHandler::notifySentinel
00098   (
00099     const ALARM_LEVEL level,
00100     xcept::Exception& exception
00101   )
00102   {
00103     
00104     switch( level )
00105     {
00106       case OKAY:
00107         LOG4CPLUS_INFO(app_->getApplicationLogger(),
00108           xcept::stdformat_exception_history(exception));
00109         break;
00110         
00111       case WARNING:
00112         LOG4CPLUS_WARN(app_->getApplicationLogger(),
00113           xcept::stdformat_exception_history(exception));
00114         app_->notifyQualified("warning", exception);
00115         break;
00116         
00117       case ERROR:
00118         LOG4CPLUS_ERROR(app_->getApplicationLogger(),
00119           xcept::stdformat_exception_history(exception));
00120         app_->notifyQualified("error", exception);
00121         break;
00122         
00123       case FATAL:
00124         LOG4CPLUS_FATAL(app_->getApplicationLogger(),
00125           xcept::stdformat_exception_history(exception));
00126         app_->notifyQualified("fatal", exception);
00127         break;
00128         
00129       default:
00130         LOG4CPLUS_WARN(app_->getApplicationLogger(),
00131           "Unknown alarm level received for exception: " <<
00132           xcept::stdformat_exception_history(exception));
00133     }
00134   }
00135   
00136   
00137   bool AlarmHandler::raiseAlarm
00138   (
00139     const std::string name,
00140     const std::string level,
00141     xcept::Exception& exception
00142   )
00143   {
00144     
00145     if (!alarmInfoSpace_) return false;
00146     
00147     boost::mutex::scoped_lock sl( mutex_ );
00148     
00149     sentinel::utils::Alarm *alarm =
00150       new sentinel::utils::Alarm(level, exception, app_);
00151     try
00152     {
00153       alarmInfoSpace_->fireItemAvailable(name, alarm);
00154     }
00155     catch(xdata::exception::Exception)
00156     {
00157       // Alarm is already set or sentinel not available
00158       return false;
00159     }
00160     return true;
00161   }
00162   
00163   
00164   void AlarmHandler::revokeAlarm
00165   (
00166     const std::string name
00167   )
00168   {
00169     if (!alarmInfoSpace_) return;
00170     
00171     boost::mutex::scoped_lock sl( mutex_ );
00172     
00173     sentinel::utils::Alarm *alarm;
00174     try
00175     {
00176       alarm = dynamic_cast<sentinel::utils::Alarm*>( alarmInfoSpace_->find( name ) );
00177     }
00178     catch(xdata::exception::Exception)
00179     {
00180       // Alarm has not been set or sentinel not available
00181       return;
00182     }
00183     
00184     LOG4CPLUS_INFO(app_->getApplicationLogger(), "Revoking alarm " << name);
00185     
00186     alarmInfoSpace_->fireItemRevoked(name, app_);
00187     delete alarm;
00188   }
00189   
00190   
00191   void AlarmHandler::clearAllAlarms()
00192   {
00193     if (!alarmInfoSpace_) return;
00194     
00195     boost::mutex::scoped_lock sl( mutex_ );
00196     
00197     typedef std::map<std::string, xdata::Serializable*, std::less<std::string> > alarmList;
00198     alarmList alarms = alarmInfoSpace_->match(".*");
00199     for (alarmList::const_iterator it = alarms.begin(), itEnd = alarms.end();
00200          it != itEnd; ++it)
00201     {
00202       sentinel::utils::Alarm* alarm = dynamic_cast<sentinel::utils::Alarm*>(it->second);
00203       alarmInfoSpace_->fireItemRevoked(it->first, app_);
00204       delete alarm;
00205     }
00206   }
00207   
00208   
00209   void AlarmHandler::moveToFailedState( xcept::Exception& exception )
00210   {
00211     std::string errorMsg = "Failed to process FAIL exception: "
00212       + xcept::stdformat_exception_history(exception) + " due to ";
00213 
00214     try
00215     {
00216       notifySentinel(AlarmHandler::FATAL, exception);
00217       sharedResources_->statisticsReporter_->getStateMachineMonitorCollection().setStatusMessage( 
00218         xcept::stdformat_exception_history(exception)
00219       );
00220       EventPtr_t stMachEvent( new Fail() );
00221       // wait maximum 5 seconds until enqueuing succeeds
00222       if ( ! sharedResources_->commandQueue_->enqTimedWait( stMachEvent, boost::posix_time::seconds(5) ) )
00223       {
00224         XCEPT_DECLARE_NESTED( stor::exception::StateTransition,
00225           sentinelException, "Failed to enqueue FAIL event", exception );
00226         notifySentinel(AlarmHandler::FATAL, sentinelException);
00227       }
00228     }
00229     catch(xcept::Exception &e)
00230     {
00231       errorMsg += xcept::stdformat_exception_history(e);
00232       localDebug( errorMsg );
00233     }
00234     catch(std::exception &e)
00235     {
00236       errorMsg += e.what();
00237       localDebug( errorMsg );
00238     }
00239     catch( ... )
00240     {
00241       errorMsg += "an unknown exception.";
00242       localDebug( errorMsg );
00243     }
00244   }
00245  
00246 
00247   void AlarmHandler::localDebug( const std::string& message ) const
00248   {
00249     std::ostringstream fname_oss;
00250     fname_oss << "/tmp/storage_manager_debug_" << 
00251       sharedResources_->configuration_->getDiskWritingParams().smInstanceString_ <<
00252       "_" << getpid();
00253     const std::string fname = fname_oss.str();
00254     std::ofstream f( fname.c_str(), std::ios_base::ate | std::ios_base::out | std::ios_base::app );
00255     if( f.is_open() )
00256     {
00257       try
00258       {
00259         f << message << std::endl;
00260         f.close();
00261       }
00262       catch(...)
00263       {}
00264     }
00265   }
00266   
00267 } // namespace stor
00268 
00269