Go to the documentation of this file.00001
00003
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008
00009 #include <boost/bind.hpp>
00010
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015
00016
00017 namespace stor {
00018
00019 RunMonitorCollection::RunMonitorCollection
00020 (
00021 const utils::Duration_t& updateInterval,
00022 AlarmHandlerPtr ah,
00023 SharedResourcesPtr sr
00024 ) :
00025 MonitorCollection(updateInterval),
00026 eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027 errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028 unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00029 runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00030 lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031 eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00032 alarmHandler_(ah),
00033 sharedResources_(sr)
00034 {}
00035
00036
00037 void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00038 {
00039 alarmParams_ = alarmParams;
00040 }
00041
00042
00043 void RunMonitorCollection::do_calculateStatistics()
00044 {
00045 eventIDsReceived_.calculateStatistics();
00046 errorEventIDsReceived_.calculateStatistics();
00047 unwantedEventIDsReceived_.calculateStatistics();
00048 runNumbersSeen_.calculateStatistics();
00049 lumiSectionsSeen_.calculateStatistics();
00050 eolsSeen_.calculateStatistics();
00051
00052 checkForBadEvents();
00053 }
00054
00055
00056 void RunMonitorCollection::do_reset()
00057 {
00058 eventIDsReceived_.reset();
00059 errorEventIDsReceived_.reset();
00060 unwantedEventIDsReceived_.reset();
00061 runNumbersSeen_.reset();
00062 lumiSectionsSeen_.reset();
00063 eolsSeen_.reset();
00064
00065 unwantedEventsMap_.clear();
00066 }
00067
00068
00069 void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00070 {
00071 infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00072 infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00073 infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00074 infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00075 }
00076
00077
00078 void RunMonitorCollection::do_updateInfoSpaceItems()
00079 {
00080 MonitoredQuantity::Stats runNumberStats;
00081 runNumbersSeen_.getStats(runNumberStats);
00082 runNumber_ = static_cast<xdata::UnsignedInteger32>(
00083 static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00084
00085 MonitoredQuantity::Stats eventIDsReceivedStats;
00086 eventIDsReceived_.getStats(eventIDsReceivedStats);
00087 dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00088 static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00089
00090 MonitoredQuantity::Stats errorEventIDsReceivedStats;
00091 errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00092 errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00093 static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00094
00095 MonitoredQuantity::Stats unwantedEventStats;
00096 unwantedEventIDsReceived_.getStats(unwantedEventStats);
00097 unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00098 static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00099 }
00100
00101
00102 void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00103 {
00104 if ( ioc.faulty() || !ioc.complete() ) return;
00105
00106 unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00107
00108 uint32_t outputModuleId = ioc.outputModuleId();
00109
00110 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00111
00112 UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00113
00114 if(pos != unwantedEventsMap_.end() &&
00115 !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00116 {
00117
00118 ++(pos->second.count);
00119 }
00120 else
00121 {
00122 UnwantedEvent newEvent(ioc);
00123 unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00124 }
00125 }
00126
00127
00128 void RunMonitorCollection::checkForBadEvents()
00129 {
00130 alarmErrorEvents();
00131
00132 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00133 std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00134 boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00135 }
00136
00137
00138 void RunMonitorCollection::alarmErrorEvents()
00139 {
00140 if ( ! alarmParams_.isProductionSystem_ ) return;
00141
00142 const std::string alarmName("ErrorEvents");
00143
00144 MonitoredQuantity::Stats stats;
00145 errorEventIDsReceived_.getStats(stats);
00146 long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00147
00148 if ( count >= alarmParams_.errorEvents_ )
00149 {
00150 std::ostringstream msg;
00151 msg << "Received " << count << " error events in the last "
00152 << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00153 XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00154 alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00155 }
00156 else
00157 {
00158 alarmHandler_->revokeAlarm( alarmName );
00159 }
00160 }
00161
00162
00163 void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00164 {
00165 if ( ! alarmParams_.isProductionSystem_ ) return;
00166
00167 if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00168 {
00169 std::ostringstream msg;
00170 msg << "Received " << val.second.count << " events"
00171 << " not tagged for any stream or consumer."
00172 << " Output module " <<
00173 sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00174 << " (id " << val.first << ")"
00175 << " HLT trigger bits: ";
00176
00177
00178 int byteIndex = 0;
00179 int subIndex = 0;
00180 for (unsigned int pathIndex = 0;
00181 pathIndex < val.second.hltTriggerCount;
00182 ++pathIndex)
00183 {
00184 int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00185 state &= 0x3;
00186 msg << state << " ";
00187 ++subIndex;
00188 if (subIndex == 4)
00189 { ++byteIndex;
00190 subIndex = 0;
00191 }
00192 }
00193
00194 XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00195 alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00196
00197 val.second.previousCount = val.second.count;
00198 }
00199 else if (val.second.count == val.second.previousCount)
00200
00201 {
00202 alarmHandler_->revokeAlarm( val.second.alarmName );
00203 }
00204 }
00205
00206
00207 RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00208 : count(1), previousCount(0)
00209 {
00210 std::ostringstream str;
00211 str << "UnwantedEvent_" << nextId++;
00212 alarmName = str.str();
00213 hltTriggerCount = ioc.hltTriggerCount();
00214 ioc.hltTriggerBits(bitList);
00215 }
00216
00217 uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00218
00219 }
00220