Go to the documentation of this file.00001
00003
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008
00009 #include <boost/bind.hpp>
00010
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015
00016
00017 namespace stor {
00018
00019 RunMonitorCollection::RunMonitorCollection
00020 (
00021 const utils::Duration_t& updateInterval,
00022 SharedResourcesPtr sr
00023 ) :
00024 MonitorCollection(updateInterval),
00025 eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00026 errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027 unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028 runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00029 lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00030 eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031 sharedResources_(sr)
00032 {}
00033
00034
00035 void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00036 {
00037 alarmParams_ = alarmParams;
00038 }
00039
00040
00041 void RunMonitorCollection::do_calculateStatistics()
00042 {
00043 eventIDsReceived_.calculateStatistics();
00044 errorEventIDsReceived_.calculateStatistics();
00045 unwantedEventIDsReceived_.calculateStatistics();
00046 runNumbersSeen_.calculateStatistics();
00047 lumiSectionsSeen_.calculateStatistics();
00048 eolsSeen_.calculateStatistics();
00049
00050 checkForBadEvents();
00051 }
00052
00053
00054 void RunMonitorCollection::do_reset()
00055 {
00056 eventIDsReceived_.reset();
00057 errorEventIDsReceived_.reset();
00058 unwantedEventIDsReceived_.reset();
00059 runNumbersSeen_.reset();
00060 lumiSectionsSeen_.reset();
00061 eolsSeen_.reset();
00062
00063 unwantedEventsMap_.clear();
00064 }
00065
00066
00067 void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00068 {
00069 infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00070 infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00071 infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00072 infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00073 }
00074
00075
00076 void RunMonitorCollection::do_updateInfoSpaceItems()
00077 {
00078 MonitoredQuantity::Stats runNumberStats;
00079 runNumbersSeen_.getStats(runNumberStats);
00080 runNumber_ = static_cast<xdata::UnsignedInteger32>(
00081 static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00082
00083 MonitoredQuantity::Stats eventIDsReceivedStats;
00084 eventIDsReceived_.getStats(eventIDsReceivedStats);
00085 dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00086 static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00087
00088 MonitoredQuantity::Stats errorEventIDsReceivedStats;
00089 errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00090 errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00091 static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00092
00093 MonitoredQuantity::Stats unwantedEventStats;
00094 unwantedEventIDsReceived_.getStats(unwantedEventStats);
00095 unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00096 static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00097 }
00098
00099
00100 void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00101 {
00102 if ( ioc.faulty() || !ioc.complete() ) return;
00103
00104 unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00105
00106 uint32_t outputModuleId = ioc.outputModuleId();
00107
00108 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00109
00110 UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00111
00112 if(pos != unwantedEventsMap_.end() &&
00113 !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00114 {
00115
00116 ++(pos->second.count);
00117 }
00118 else
00119 {
00120 UnwantedEvent newEvent(ioc);
00121 unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00122 }
00123 }
00124
00125
00126 void RunMonitorCollection::checkForBadEvents()
00127 {
00128 alarmErrorEvents();
00129
00130 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00131 std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00132 boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00133 }
00134
00135
00136 void RunMonitorCollection::alarmErrorEvents()
00137 {
00138 if ( ! alarmParams_.isProductionSystem_ ) return;
00139
00140 const std::string alarmName("ErrorEvents");
00141
00142 MonitoredQuantity::Stats stats;
00143 errorEventIDsReceived_.getStats(stats);
00144 long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00145
00146 if ( count >= alarmParams_.errorEvents_ )
00147 {
00148 std::ostringstream msg;
00149 msg << "Received " << count << " error events in the last "
00150 << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00151 XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00152 sharedResources_->alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00153 }
00154 else
00155 {
00156 sharedResources_->alarmHandler_->revokeAlarm( alarmName );
00157 }
00158 }
00159
00160
00161 void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00162 {
00163 if ( ! alarmParams_.isProductionSystem_ ) return;
00164
00165 if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00166 {
00167 std::ostringstream msg;
00168 msg << "Received " << val.second.count << " events"
00169 << " not tagged for any stream or consumer."
00170 << " Output module " <<
00171 sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00172 << " (id " << val.first << ")"
00173 << " HLT trigger bits: ";
00174
00175
00176 int byteIndex = 0;
00177 int subIndex = 0;
00178 for (unsigned int pathIndex = 0;
00179 pathIndex < val.second.hltTriggerCount;
00180 ++pathIndex)
00181 {
00182 int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00183 state &= 0x3;
00184 msg << state << " ";
00185 ++subIndex;
00186 if (subIndex == 4)
00187 { ++byteIndex;
00188 subIndex = 0;
00189 }
00190 }
00191
00192 XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00193 sharedResources_->alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00194
00195 val.second.previousCount = val.second.count;
00196 }
00197 else if (val.second.count == val.second.previousCount)
00198
00199 {
00200 sharedResources_->alarmHandler_->revokeAlarm( val.second.alarmName );
00201 }
00202 }
00203
00204
00205 RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00206 : count(1), previousCount(0)
00207 {
00208 std::ostringstream str;
00209 str << "UnwantedEvent_" << nextId++;
00210 alarmName = str.str();
00211 hltTriggerCount = ioc.hltTriggerCount();
00212 ioc.hltTriggerBits(bitList);
00213 }
00214
00215 uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00216
00217 }
00218