CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch13/src/EventFilter/StorageManager/src/RunMonitorCollection.cc

Go to the documentation of this file.
00001 // $Id: RunMonitorCollection.cc,v 1.16 2011/04/19 11:10:34 mommsen Exp $
00003 
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008 
00009 #include <boost/bind.hpp>
00010 
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015 
00016 
00017 namespace stor {
00018   
00019   RunMonitorCollection::RunMonitorCollection
00020   (
00021     const utils::Duration_t& updateInterval,
00022     AlarmHandlerPtr ah,
00023     SharedResourcesPtr sr
00024   ) :
00025   MonitorCollection(updateInterval),
00026   eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027   errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028   unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00029   runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00030   lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031   eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00032   alarmHandler_(ah),
00033   sharedResources_(sr)
00034   {}
00035   
00036   
00037   void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00038   {
00039     alarmParams_ = alarmParams;
00040   }
00041   
00042   
00043   void RunMonitorCollection::do_calculateStatistics()
00044   {
00045     eventIDsReceived_.calculateStatistics();
00046     errorEventIDsReceived_.calculateStatistics();
00047     unwantedEventIDsReceived_.calculateStatistics();
00048     runNumbersSeen_.calculateStatistics();
00049     lumiSectionsSeen_.calculateStatistics();
00050     eolsSeen_.calculateStatistics();
00051     
00052     checkForBadEvents();
00053   }
00054   
00055   
00056   void RunMonitorCollection::do_reset()
00057   {
00058     eventIDsReceived_.reset();
00059     errorEventIDsReceived_.reset();
00060     unwantedEventIDsReceived_.reset();
00061     runNumbersSeen_.reset();
00062     lumiSectionsSeen_.reset();
00063     eolsSeen_.reset();
00064     
00065     unwantedEventsMap_.clear();
00066   }
00067   
00068   
00069   void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00070   {
00071     infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00072     infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00073     infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00074     infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00075   }
00076   
00077   
00078   void RunMonitorCollection::do_updateInfoSpaceItems()
00079   {
00080     MonitoredQuantity::Stats runNumberStats;
00081     runNumbersSeen_.getStats(runNumberStats);
00082     runNumber_ = static_cast<xdata::UnsignedInteger32>(
00083       static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00084     
00085     MonitoredQuantity::Stats eventIDsReceivedStats;
00086     eventIDsReceived_.getStats(eventIDsReceivedStats);
00087     dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00088       static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00089     
00090     MonitoredQuantity::Stats errorEventIDsReceivedStats;
00091     errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00092     errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00093       static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00094     
00095     MonitoredQuantity::Stats unwantedEventStats;
00096     unwantedEventIDsReceived_.getStats(unwantedEventStats);
00097     unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00098       static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00099   }
00100   
00101   
00102   void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00103   {
00104     if ( ioc.faulty() || !ioc.complete() ) return;
00105     
00106     unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00107     
00108     uint32_t outputModuleId = ioc.outputModuleId();
00109     
00110     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00111     
00112     UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00113     
00114     if(pos != unwantedEventsMap_.end() &&
00115       !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00116     {
00117       // key already exists
00118       ++(pos->second.count);
00119     }
00120     else
00121     {
00122       UnwantedEvent newEvent(ioc);
00123       unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00124     }
00125   }
00126   
00127   
00128   void RunMonitorCollection::checkForBadEvents()
00129   {
00130     alarmErrorEvents();
00131     
00132     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00133     std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00134       boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00135   }
00136   
00137   
00138   void RunMonitorCollection::alarmErrorEvents()
00139   {
00140     if ( ! alarmParams_.isProductionSystem_ ) return;
00141     
00142     const std::string alarmName("ErrorEvents");
00143     
00144     MonitoredQuantity::Stats stats;
00145     errorEventIDsReceived_.getStats(stats);
00146     long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00147     
00148     if ( count >= alarmParams_.errorEvents_ )
00149     {
00150       std::ostringstream msg;
00151       msg << "Received " << count << " error events in the last "
00152         << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00153       XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00154       alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00155     }
00156     else
00157     {
00158       alarmHandler_->revokeAlarm( alarmName );
00159     }
00160   }
00161   
00162   
00163   void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00164   {
00165     if ( ! alarmParams_.isProductionSystem_ ) return;
00166     
00167     if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00168     {
00169       std::ostringstream msg;
00170       msg << "Received " << val.second.count << " events"
00171         << " not tagged for any stream or consumer."
00172         << " Output module " << 
00173         sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00174         << " (id " << val.first << ")"
00175         << " HLT trigger bits: ";
00176       
00177       // This code snipped taken from evm:EventSelector::acceptEvent
00178       int byteIndex = 0;
00179       int subIndex  = 0;
00180       for (unsigned int pathIndex = 0;
00181            pathIndex < val.second.hltTriggerCount;
00182            ++pathIndex)
00183       {
00184         int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00185         state &= 0x3;
00186         msg << state << " ";
00187         ++subIndex;
00188         if (subIndex == 4)
00189         { ++byteIndex;
00190           subIndex = 0;
00191         }
00192       }
00193       
00194       XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00195       alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00196       
00197       val.second.previousCount = val.second.count;
00198     }
00199     else if (val.second.count == val.second.previousCount)
00200       // no more unwanted events arrived
00201     {
00202       alarmHandler_->revokeAlarm( val.second.alarmName );
00203     }
00204   }
00205   
00206   
00207   RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00208   : count(1), previousCount(0)
00209   {
00210     std::ostringstream str;
00211     str << "UnwantedEvent_" << nextId++;
00212     alarmName = str.str();
00213     hltTriggerCount = ioc.hltTriggerCount();
00214     ioc.hltTriggerBits(bitList);
00215   }
00216   
00217   uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00218   
00219 } // namespace stor
00220