CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_2_9/src/EventFilter/StorageManager/src/RunMonitorCollection.cc

Go to the documentation of this file.
00001 // $Id: RunMonitorCollection.cc,v 1.18 2012/06/08 10:20:33 mommsen Exp $
00003 
00004 #include <string>
00005 #include <sstream>
00006 #include <iomanip>
00007 #include <algorithm>
00008 
00009 #include <boost/bind.hpp>
00010 
00011 #include "EventFilter/StorageManager/interface/AlarmHandler.h"
00012 #include "EventFilter/StorageManager/interface/Exception.h"
00013 #include "EventFilter/StorageManager/interface/InitMsgCollection.h"
00014 #include "EventFilter/StorageManager/interface/RunMonitorCollection.h"
00015 
00016 
00017 namespace stor {
00018   
00019   RunMonitorCollection::RunMonitorCollection
00020   (
00021     const utils::Duration_t& updateInterval,
00022     SharedResourcesPtr sr
00023   ) :
00024   MonitorCollection(updateInterval),
00025   eventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00026   errorEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00027   unwantedEventIDsReceived_(updateInterval, boost::posix_time::seconds(1)),
00028   runNumbersSeen_(updateInterval, boost::posix_time::seconds(1)),
00029   lumiSectionsSeen_(updateInterval, boost::posix_time::seconds(1)),
00030   eolsSeen_(updateInterval, boost::posix_time::seconds(1)),
00031   sharedResources_(sr)
00032   {}
00033   
00034   
00035   void RunMonitorCollection::configureAlarms(AlarmParams const& alarmParams)
00036   {
00037     alarmParams_ = alarmParams;
00038   }
00039   
00040   
00041   void RunMonitorCollection::do_calculateStatistics()
00042   {
00043     eventIDsReceived_.calculateStatistics();
00044     errorEventIDsReceived_.calculateStatistics();
00045     unwantedEventIDsReceived_.calculateStatistics();
00046     runNumbersSeen_.calculateStatistics();
00047     lumiSectionsSeen_.calculateStatistics();
00048     eolsSeen_.calculateStatistics();
00049     
00050     checkForBadEvents();
00051   }
00052   
00053   
00054   void RunMonitorCollection::do_reset()
00055   {
00056     eventIDsReceived_.reset();
00057     errorEventIDsReceived_.reset();
00058     unwantedEventIDsReceived_.reset();
00059     runNumbersSeen_.reset();
00060     lumiSectionsSeen_.reset();
00061     eolsSeen_.reset();
00062     
00063     unwantedEventsMap_.clear();
00064   }
00065   
00066   
00067   void RunMonitorCollection::do_appendInfoSpaceItems(InfoSpaceItems& infoSpaceItems)
00068   {
00069     infoSpaceItems.push_back(std::make_pair("runNumber", &runNumber_));
00070     infoSpaceItems.push_back(std::make_pair("dataEvents", &dataEvents_));
00071     infoSpaceItems.push_back(std::make_pair("errorEvents", &errorEvents_));
00072     infoSpaceItems.push_back(std::make_pair("unwantedEvents", &unwantedEvents_));
00073   }
00074   
00075   
00076   void RunMonitorCollection::do_updateInfoSpaceItems()
00077   {
00078     MonitoredQuantity::Stats runNumberStats;
00079     runNumbersSeen_.getStats(runNumberStats);
00080     runNumber_ = static_cast<xdata::UnsignedInteger32>(
00081       static_cast<unsigned int>(runNumberStats.getLastSampleValue()));
00082     
00083     MonitoredQuantity::Stats eventIDsReceivedStats;
00084     eventIDsReceived_.getStats(eventIDsReceivedStats);
00085     dataEvents_ = static_cast<xdata::UnsignedInteger32>(
00086       static_cast<unsigned int>(eventIDsReceivedStats.getSampleCount()));
00087     
00088     MonitoredQuantity::Stats errorEventIDsReceivedStats;
00089     errorEventIDsReceived_.getStats(errorEventIDsReceivedStats);
00090     errorEvents_ = static_cast<xdata::UnsignedInteger32>(
00091       static_cast<unsigned int>(errorEventIDsReceivedStats.getSampleCount()));
00092     
00093     MonitoredQuantity::Stats unwantedEventStats;
00094     unwantedEventIDsReceived_.getStats(unwantedEventStats);
00095     unwantedEvents_ = static_cast<xdata::UnsignedInteger32>(
00096       static_cast<unsigned int>(unwantedEventStats.getSampleCount()));
00097   }
00098   
00099   
00100   void RunMonitorCollection::addUnwantedEvent(const I2OChain& ioc)
00101   {
00102     if ( ! alarmParams_.careAboutUnwantedEvents_ ) return;
00103     if ( ioc.faulty() || !ioc.complete() ) return;
00104     
00105     unwantedEventIDsReceived_.addSample(ioc.eventNumber());
00106     
00107     uint32_t outputModuleId = ioc.outputModuleId();
00108     
00109     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00110     
00111     UnwantedEventsMap::iterator pos = unwantedEventsMap_.lower_bound(outputModuleId);
00112     
00113     if(pos != unwantedEventsMap_.end() &&
00114       !(unwantedEventsMap_.key_comp()(outputModuleId, pos->first)))
00115     {
00116       // key already exists
00117       ++(pos->second.count);
00118     }
00119     else
00120     {
00121       UnwantedEvent newEvent(ioc);
00122       unwantedEventsMap_.insert(pos, UnwantedEventsMap::value_type(outputModuleId, newEvent));
00123     }
00124   }
00125   
00126   
00127   void RunMonitorCollection::checkForBadEvents()
00128   {
00129     alarmErrorEvents();
00130     
00131     boost::mutex::scoped_lock sl(unwantedEventMapLock_);
00132     std::for_each(unwantedEventsMap_.begin(), unwantedEventsMap_.end(),
00133       boost::bind(&RunMonitorCollection::alarmUnwantedEvents, this, _1));
00134   }
00135   
00136   
00137   void RunMonitorCollection::alarmErrorEvents()
00138   {
00139     if ( ! alarmParams_.isProductionSystem_ ) return;
00140     
00141     const std::string alarmName("ErrorEvents");
00142     
00143     MonitoredQuantity::Stats stats;
00144     errorEventIDsReceived_.getStats(stats);
00145     long long count = stats.getSampleCount(MonitoredQuantity::RECENT);
00146     
00147     if ( count >= alarmParams_.errorEvents_ )
00148     {
00149       std::ostringstream msg;
00150       msg << "Received " << count << " error events in the last "
00151         << stats.getDuration(MonitoredQuantity::RECENT).total_seconds() << "s.";
00152       XCEPT_DECLARE( stor::exception::ErrorEvents, xcept, msg.str() );
00153       sharedResources_->alarmHandler_->raiseAlarm( alarmName, AlarmHandler::ERROR, xcept );
00154     }
00155     else
00156     {
00157       sharedResources_->alarmHandler_->revokeAlarm( alarmName );
00158     }
00159   }
00160   
00161   
00162   void RunMonitorCollection::alarmUnwantedEvents(UnwantedEventsMap::value_type& val)
00163   {
00164     if ( ! alarmParams_.isProductionSystem_ ) return;
00165     
00166     if ( (val.second.count - val.second.previousCount) > alarmParams_.unwantedEvents_ )
00167     {
00168       std::ostringstream msg;
00169       msg << "Received " << val.second.count << " events"
00170         << " not tagged for any stream or consumer."
00171         << " Output module " << 
00172         sharedResources_->initMsgCollection_->getOutputModuleName(val.first)
00173         << " (id " << val.first << ")"
00174         << " HLT trigger bits: ";
00175       
00176       // This code snipped taken from evm:EventSelector::acceptEvent
00177       int byteIndex = 0;
00178       int subIndex  = 0;
00179       for (unsigned int pathIndex = 0;
00180            pathIndex < val.second.hltTriggerCount;
00181            ++pathIndex)
00182       {
00183         int state = val.second.bitList[byteIndex] >> (subIndex * 2);
00184         state &= 0x3;
00185         msg << state << " ";
00186         ++subIndex;
00187         if (subIndex == 4)
00188         { ++byteIndex;
00189           subIndex = 0;
00190         }
00191       }
00192       
00193       XCEPT_DECLARE( stor::exception::UnwantedEvents, xcept, msg.str() );
00194       sharedResources_->alarmHandler_->raiseAlarm( val.second.alarmName, AlarmHandler::ERROR, xcept );
00195       
00196       val.second.previousCount = val.second.count;
00197     }
00198     else if (val.second.count == val.second.previousCount)
00199       // no more unwanted events arrived
00200     {
00201       sharedResources_->alarmHandler_->revokeAlarm( val.second.alarmName );
00202     }
00203   }
00204   
00205   
00206   RunMonitorCollection::UnwantedEvent::UnwantedEvent(const I2OChain& ioc)
00207   : count(1), previousCount(0)
00208   {
00209     std::ostringstream str;
00210     str << "UnwantedEvent_" << nextId++;
00211     alarmName = str.str();
00212     hltTriggerCount = ioc.hltTriggerCount();
00213     ioc.hltTriggerBits(bitList);
00214   }
00215   
00216   uint32_t RunMonitorCollection::UnwantedEvent::nextId(0);
00217   
00218 } // namespace stor
00219