CMS 3D CMS Logo

CMSSW_4_4_3_patch1/src/EventFilter/StorageManager/src/RunMonitorCollection.cc

Go to the documentation of this file.
00001 // $Id: RunMonitorCollection.cc,v 1.17 2011/11/08 10:48:41 mommsen Exp $
00003 
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008 
00009 #include <boost/bind.hpp>
00010 
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015 
00016 
00017 namespace stor {
00018   
00019   RunMonitorCollection::RunMonitorCollection
00020   (
00021     const utils::Duration_t& updateInterval,
00022     SharedResourcesPtr sr
00023   ) :
00024   MonitorCollection(updateInterval),
00025   eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00026   errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027   unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028   runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00029   lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00030   eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031   sharedResources_(sr)
00032   {}
00033   
00034   
00035   void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00036   {
00037     alarmParams_ = alarmParams;
00038   }
00039   
00040   
00041   void RunMonitorCollection::do_calculateStatistics()
00042   {
00043     eventIDsReceived_.calculateStatistics();
00044     errorEventIDsReceived_.calculateStatistics();
00045     unwantedEventIDsReceived_.calculateStatistics();
00046     runNumbersSeen_.calculateStatistics();
00047     lumiSectionsSeen_.calculateStatistics();
00048     eolsSeen_.calculateStatistics();
00049     
00050     checkForBadEvents();
00051   }
00052   
00053   
00054   void RunMonitorCollection::do_reset()
00055   {
00056     eventIDsReceived_.reset();
00057     errorEventIDsReceived_.reset();
00058     unwantedEventIDsReceived_.reset();
00059     runNumbersSeen_.reset();
00060     lumiSectionsSeen_.reset();
00061     eolsSeen_.reset();
00062     
00063     unwantedEventsMap_.clear();
00064   }
00065   
00066   
00067   void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00068   {
00069     infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00070     infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00071     infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00072     infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00073   }
00074   
00075   
00076   void RunMonitorCollection::do_updateInfoSpaceItems()
00077   {
00078     MonitoredQuantity::Stats runNumberStats;
00079     runNumbersSeen_.getStats(runNumberStats);
00080     runNumber_ = static_cast<xdata::UnsignedInteger32>(
00081       static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00082     
00083     MonitoredQuantity::Stats eventIDsReceivedStats;
00084     eventIDsReceived_.getStats(eventIDsReceivedStats);
00085     dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00086       static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00087     
00088     MonitoredQuantity::Stats errorEventIDsReceivedStats;
00089     errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00090     errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00091       static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00092     
00093     MonitoredQuantity::Stats unwantedEventStats;
00094     unwantedEventIDsReceived_.getStats(unwantedEventStats);
00095     unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00096       static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00097   }
00098   
00099   
00100   void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00101   {
00102     if ( ioc.faulty() || !ioc.complete() ) return;
00103     
00104     unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00105     
00106     uint32_t outputModuleId = ioc.outputModuleId();
00107     
00108     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00109     
00110     UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00111     
00112     if(pos != unwantedEventsMap_.end() &&
00113       !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00114     {
00115       // key already exists
00116       ++(pos->second.count);
00117     }
00118     else
00119     {
00120       UnwantedEvent newEvent(ioc);
00121       unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00122     }
00123   }
00124   
00125   
00126   void RunMonitorCollection::checkForBadEvents()
00127   {
00128     alarmErrorEvents();
00129     
00130     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00131     std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00132       boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00133   }
00134   
00135   
00136   void RunMonitorCollection::alarmErrorEvents()
00137   {
00138     if ( ! alarmParams_.isProductionSystem_ ) return;
00139     
00140     const std::string alarmName("ErrorEvents");
00141     
00142     MonitoredQuantity::Stats stats;
00143     errorEventIDsReceived_.getStats(stats);
00144     long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00145     
00146     if ( count >= alarmParams_.errorEvents_ )
00147     {
00148       std::ostringstream msg;
00149       msg << "Received " << count << " error events in the last "
00150         << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00151       XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00152       sharedResources_->alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00153     }
00154     else
00155     {
00156       sharedResources_->alarmHandler_->revokeAlarm( alarmName );
00157     }
00158   }
00159   
00160   
00161   void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00162   {
00163     if ( ! alarmParams_.isProductionSystem_ ) return;
00164     
00165     if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00166     {
00167       std::ostringstream msg;
00168       msg << "Received " << val.second.count << " events"
00169         << " not tagged for any stream or consumer."
00170         << " Output module " << 
00171         sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00172         << " (id " << val.first << ")"
00173         << " HLT trigger bits: ";
00174       
00175       // This code snipped taken from evm:EventSelector::acceptEvent
00176       int byteIndex = 0;
00177       int subIndex  = 0;
00178       for (unsigned int pathIndex = 0;
00179            pathIndex < val.second.hltTriggerCount;
00180            ++pathIndex)
00181       {
00182         int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00183         state &= 0x3;
00184         msg << state << " ";
00185         ++subIndex;
00186         if (subIndex == 4)
00187         { ++byteIndex;
00188           subIndex = 0;
00189         }
00190       }
00191       
00192       XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00193       sharedResources_->alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00194       
00195       val.second.previousCount = val.second.count;
00196     }
00197     else if (val.second.count == val.second.previousCount)
00198       // no more unwanted events arrived
00199     {
00200       sharedResources_->alarmHandler_->revokeAlarm( val.second.alarmName );
00201     }
00202   }
00203   
00204   
00205   RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00206   : count(1), previousCount(0)
00207   {
00208     std::ostringstream str;
00209     str << "UnwantedEvent_" << nextId++;
00210     alarmName = str.str();
00211     hltTriggerCount = ioc.hltTriggerCount();
00212     ioc.hltTriggerBits(bitList);
00213   }
00214   
00215   uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00216   
00217 } // namespace stor
00218