Go to the documentation of this file.00001
00003
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008
00009 #include <boost/bind.hpp>
00010
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015
00016
00017 namespace stor {
00018
00019 RunMonitorCollection::RunMonitorCollection
00020 (
00021 const utils::Duration_t& updateInterval,
00022 SharedResourcesPtr sr
00023 ) :
00024 MonitorCollection(updateInterval),
00025 eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00026 errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027 unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028 runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00029 lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00030 eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031 sharedResources_(sr)
00032 {}
00033
00034
00035 void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00036 {
00037 alarmParams_ = alarmParams;
00038 }
00039
00040
00041 void RunMonitorCollection::do_calculateStatistics()
00042 {
00043 eventIDsReceived_.calculateStatistics();
00044 errorEventIDsReceived_.calculateStatistics();
00045 unwantedEventIDsReceived_.calculateStatistics();
00046 runNumbersSeen_.calculateStatistics();
00047 lumiSectionsSeen_.calculateStatistics();
00048 eolsSeen_.calculateStatistics();
00049
00050 checkForBadEvents();
00051 }
00052
00053
00054 void RunMonitorCollection::do_reset()
00055 {
00056 eventIDsReceived_.reset();
00057 errorEventIDsReceived_.reset();
00058 unwantedEventIDsReceived_.reset();
00059 runNumbersSeen_.reset();
00060 lumiSectionsSeen_.reset();
00061 eolsSeen_.reset();
00062
00063 unwantedEventsMap_.clear();
00064 }
00065
00066
00067 void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00068 {
00069 infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00070 infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00071 infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00072 infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00073 }
00074
00075
00076 void RunMonitorCollection::do_updateInfoSpaceItems()
00077 {
00078 MonitoredQuantity::Stats runNumberStats;
00079 runNumbersSeen_.getStats(runNumberStats);
00080 runNumber_ = static_cast<xdata::UnsignedInteger32>(
00081 static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00082
00083 MonitoredQuantity::Stats eventIDsReceivedStats;
00084 eventIDsReceived_.getStats(eventIDsReceivedStats);
00085 dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00086 static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00087
00088 MonitoredQuantity::Stats errorEventIDsReceivedStats;
00089 errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00090 errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00091 static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00092
00093 MonitoredQuantity::Stats unwantedEventStats;
00094 unwantedEventIDsReceived_.getStats(unwantedEventStats);
00095 unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00096 static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00097 }
00098
00099
00100 void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00101 {
00102 if ( ! alarmParams_.careAboutUnwantedEvents_ ) return;
00103 if ( ioc.faulty() || !ioc.complete() ) return;
00104
00105 unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00106
00107 uint32_t outputModuleId = ioc.outputModuleId();
00108
00109 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00110
00111 UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00112
00113 if(pos != unwantedEventsMap_.end() &&
00114 !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00115 {
00116
00117 ++(pos->second.count);
00118 }
00119 else
00120 {
00121 UnwantedEvent newEvent(ioc);
00122 unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00123 }
00124 }
00125
00126
00127 void RunMonitorCollection::checkForBadEvents()
00128 {
00129 alarmErrorEvents();
00130
00131 boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00132 std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00133 boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00134 }
00135
00136
00137 void RunMonitorCollection::alarmErrorEvents()
00138 {
00139 if ( ! alarmParams_.isProductionSystem_ ) return;
00140
00141 const std::string alarmName("ErrorEvents");
00142
00143 MonitoredQuantity::Stats stats;
00144 errorEventIDsReceived_.getStats(stats);
00145 long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00146
00147 if ( count >= alarmParams_.errorEvents_ )
00148 {
00149 std::ostringstream msg;
00150 msg << "Received " << count << " error events in the last "
00151 << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00152 XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00153 sharedResources_->alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00154 }
00155 else
00156 {
00157 sharedResources_->alarmHandler_->revokeAlarm( alarmName );
00158 }
00159 }
00160
00161
00162 void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00163 {
00164 if ( ! alarmParams_.isProductionSystem_ ) return;
00165
00166 if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00167 {
00168 std::ostringstream msg;
00169 msg << "Received " << val.second.count << " events"
00170 << " not tagged for any stream or consumer."
00171 << " Output module " <<
00172 sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00173 << " (id " << val.first << ")"
00174 << " HLT trigger bits: ";
00175
00176
00177 int byteIndex = 0;
00178 int subIndex = 0;
00179 for (unsigned int pathIndex = 0;
00180 pathIndex < val.second.hltTriggerCount;
00181 ++pathIndex)
00182 {
00183 int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00184 state &= 0x3;
00185 msg << state << " ";
00186 ++subIndex;
00187 if (subIndex == 4)
00188 { ++byteIndex;
00189 subIndex = 0;
00190 }
00191 }
00192
00193 XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00194 sharedResources_->alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00195
00196 val.second.previousCount = val.second.count;
00197 }
00198 else if (val.second.count == val.second.previousCount)
00199
00200 {
00201 sharedResources_->alarmHandler_->revokeAlarm( val.second.alarmName );
00202 }
00203 }
00204
00205
00206 RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00207 : count(1), previousCount(0)
00208 {
00209 std::ostringstream str;
00210 str << "UnwantedEvent_" << nextId++;
00211 alarmName = str.str();
00212 hltTriggerCount = ioc.hltTriggerCount();
00213 ioc.hltTriggerBits(bitList);
00214 }
00215
00216 uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00217
00218 }
00219