test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
FastMonitoringService.cc
Go to the documentation of this file.
2 #include <iostream>
3 
5 #include <iomanip>
6 #include <sys/time.h>
7 
18 
21 using namespace jsoncollector;
22 
25 
26 constexpr double throughputFactor() {return (1000000)/double(1024*1024);}
27 
28 static const int nReservedModules = 64;
29 static const int nSpecialModules = 10;
30 static const int nReservedPaths = 1;
31 
32 namespace evf{
33 
34  const std::string FastMonitoringService::macroStateNames[FastMonitoringThread::MCOUNT] =
35  {"Init","JobReady","RunGiven","Running",
36  "Stopping","Done","JobEnded","Error","ErrorEnded","End",
37  "Invalid"};
38 
39  const std::string FastMonitoringService::inputStateNames[FastMonitoringThread::inCOUNT] =
40  {"Ignore","Init","WaitInput","NewLumi","RunEnd","ProcessingFile","WaitChunk","ChunkReceived",
41  "ChecksumEvent","CachedEvent","ReadEvent","ReadCleanup","NoRequest","NoRequestWithIdleThreads",
42  "NoRequestWithIdleAndEoLThreads","NoRequestWithGlobalEoL","NoRequestWithAllEoLThreads","NoRequestWithEoLThreads",
43  "SupFileLimit", "SupWaitFreeChunk","SupWaitFreeChunkCopying", "SupWaitFreeThread","SupWaitFreeThreadCopying",
44  "SupBusy", "SupLockPolling","SupLockPollingCopying",
45  "SupNoFile", "SupNewFile", "SupNewFileWaitThreadCopying", "SupNewFileWaitThread",
46  "SupNewFileWaitChunkCopying", "SupNewFileWaitChunk",
47  "WaitInput_fileLimit","WaitInput_waitFreeChunk","WaitInput_waitFreeChunkCopying","WaitInput_waitFreeThread","WaitInput_waitFreeThreadCopying",
48  "WaitInput_busy","WaitInput_lockPolling","WaitInput_lockPollingCopying","WaitInput_runEnd",
49  "WaitInput_noFile","WaitInput_newFile","WaitInput_newFileWaitThreadCopying","WaitInput_newFileWaitThread",
50  "WaitInput_newFileWaitChunkCopying","WaitInput_newFileWaitChunk",
51  "WaitChunk_fileLimit","WaitChunk_waitFreeChunk","WaitChunk_waitFreeChunkCopying","WaitChunk_waitFreeThread","WaitChunk_waitFreeThreadCopying",
52  "WaitChunk_busy","WaitChunk_lockPolling","WaitChunk_lockPollingCopying","WaitChunk_runEnd",
53  "WaitChunk_noFile","WaitChunk_newFile","WaitChunk_newFileWaitThreadCopying","WaitChunk_newFileWaitThread",
54  "WaitChunk_newFileWaitChunkCopying","WaitChunk_newFileWaitChunk"
55  };
56 
57 
58  const std::string FastMonitoringService::nopath_ = "NoPath";
59 
60  FastMonitoringService::FastMonitoringService(const edm::ParameterSet& iPS,
61  edm::ActivityRegistry& reg) :
62  MicroStateService(iPS,reg)
63  ,encModule_(nReservedModules)
64  ,nStreams_(0)//until initialized
65  ,sleepTime_(iPS.getUntrackedParameter<int>("sleepTime", 1))
66  ,fastMonIntervals_(iPS.getUntrackedParameter<unsigned int>("fastMonIntervals", 2))
67  ,fastName_("fastmoni")
68  ,slowName_("slowmoni")
69  ,filePerFwkStream_(iPS.getUntrackedParameter<bool>("filePerFwkStream", false))
70  ,totalEventsProcessed_(0)
71  {
72  reg.watchPreallocate(this, &FastMonitoringService::preallocate);//receiving information on number of threads
74 
79 
83 
88 
90 
93 
94  reg.watchPreSourceEvent(this,&FastMonitoringService::preSourceEvent);//source (with streamID of requestor)
96 
99 
103 
104  //find microstate definition path (required by the module)
105  struct stat statbuf;
106  std::string microstateBaseSuffix = "src/EventFilter/Utilities/plugins/microstatedef.jsd";
107  std::string microstatePath = std::string(getenv("CMSSW_BASE")) + "/" + microstateBaseSuffix;
108  if (stat(microstatePath.c_str(), &statbuf)) {
109  microstatePath = std::string(getenv("CMSSW_RELEASE_BASE")) + "/" + microstateBaseSuffix;
110  if (stat(microstatePath.c_str(), &statbuf)) {
111  microstatePath = microstateBaseSuffix;
112  if (stat(microstatePath.c_str(), &statbuf))
113  throw cms::Exception("FastMonitoringService") << "microstate definition file not found";
114  }
115  }
116  fastMicrostateDefPath_ = microstateDefPath_ = microstatePath;
117  }
118 
119 
121  {
122  }
123 
125  {
127  desc.setComment("Service for File-based DAQ monitoring and event accounting");
128  desc.addUntracked<int> ("sleepTime",1)->setComment("Sleep time of the monitoring thread");
129  desc.addUntracked<unsigned int> ("fastMonIntervals",2)->setComment("Modulo of sleepTime intervals on which fastmon file is written out");
130  desc.addUntracked<bool> ("filePerFwkStream", false)->setComment("Switches on monitoring output per framework stream");
131  desc.setAllowAnything();
132  descriptions.add("FastMonitoringService", desc);
133  }
134 
135 
137  Json::Value legendaVector(Json::arrayValue);
138  for(int i = 0; i < encPath_[0].current_; i++)
139  legendaVector.append(Json::Value(*(static_cast<const std::string *>(encPath_[0].decode(i)))));
140  Json::Value valReserved(nReservedPaths);
141  Json::Value pathLegend;
142  pathLegend["names"]=legendaVector;
143  pathLegend["reserved"]=valReserved;
144  Json::StyledWriter writer;
145  return writer.write(pathLegend);
146  }
147 
149  Json::Value legendaVector(Json::arrayValue);
150  for(int i = 0; i < encModule_.current_; i++)
151  legendaVector.append(Json::Value((static_cast<const edm::ModuleDescription *>(encModule_.decode(i)))->moduleLabel()));
152  Json::Value valReserved(nReservedModules);
153  Json::Value valSpecial(nSpecialModules);
154  Json::Value valOutputModules(nOutputModules_);
155  Json::Value moduleLegend;
156  moduleLegend["names"]=legendaVector;
157  moduleLegend["reserved"]=valReserved;
158  moduleLegend["special"]=valSpecial;
159  moduleLegend["output"]=valOutputModules;
160  Json::StyledWriter writer;
161  return writer.write(moduleLegend);
162  }
163 
165  Json::Value legendaVector(Json::arrayValue);
166  for(int i = 0; i < FastMonitoringThread::inCOUNT; i++)
167  legendaVector.append(Json::Value(inputStateNames[i]));
168  Json::Value moduleLegend;
169  moduleLegend["names"]=legendaVector;
170  Json::StyledWriter writer;
171  return writer.write(moduleLegend);
172  }
173 
175  {
176  nStreams_=bounds.maxNumberOfStreams();
177  nThreads_=bounds.maxNumberOfThreads();
178  //this should already be >=1
179  if (nStreams_==0) nStreams_=1;
180  if (nThreads_==0) nThreads_=1;
181  }
182 
184  edm::ProcessContext const& pc)
185  {
186 
187  // FIND RUN DIRECTORY
188  // The run dir should be set via the configuration of EvFDaqDirector
189 
190  if (edm::Service<evf::EvFDaqDirector>().operator->()==nullptr)
191  {
192  throw cms::Exception("FastMonitoringService") << "EvFDaqDirector is not present";
193 
194  }
195  emptyLumisectionMode_ = edm::Service<evf::EvFDaqDirector>()->emptyLumisectionMode();
196  boost::filesystem::path runDirectory(edm::Service<evf::EvFDaqDirector>()->baseRunDir());
197  workingDirectory_ = runDirectory_ = runDirectory;
198  workingDirectory_ /= "mon";
199 
200  if ( !boost::filesystem::is_directory(workingDirectory_)) {
201  LogDebug("FastMonitoringService") << "<MON> DIR NOT FOUND! Trying to create -: " << workingDirectory_.string() ;
202  boost::filesystem::create_directories(workingDirectory_);
203  if ( !boost::filesystem::is_directory(workingDirectory_))
204  edm::LogWarning("FastMonitoringService") << "Unable to create <MON> DIR -: " << workingDirectory_.string()
205  << ". No monitoring data will be written.";
206  }
207 
208  std::ostringstream fastFileName;
209 
210  fastFileName << fastName_ << "_pid" << std::setfill('0') << std::setw(5) << getpid() << ".fast";
212  fast /= fastFileName.str();
213  fastPath_ = fast.string();
214  if (filePerFwkStream_)
215  for (unsigned int i=0;i<nStreams_;i++) {
216  std::ostringstream fastFileNameTid;
217  fastFileNameTid << fastName_ << "_pid" << std::setfill('0') << std::setw(5) << getpid() << "_tid" << i << ".fast";
219  fastTid /= fastFileNameTid.str();
220  fastPathList_.push_back(fastTid.string());
221  }
222 
223  std::ostringstream moduleLegFile;
224  std::ostringstream moduleLegFileJson;
225  moduleLegFile << "microstatelegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".leg";
226  moduleLegFileJson << "microstatelegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
227  moduleLegendFile_ = (workingDirectory_/moduleLegFile.str()).string();
228  moduleLegendFileJson_ = (workingDirectory_/moduleLegFileJson.str()).string();
229 
230  std::ostringstream pathLegFile;
231  std::ostringstream pathLegFileJson;
232  pathLegFile << "pathlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".leg";
233  pathLegendFile_ = (workingDirectory_/pathLegFile.str()).string();
234  pathLegFileJson << "pathlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
235  pathLegendFileJson_ = (workingDirectory_/pathLegFileJson.str()).string();
236 
237  std::ostringstream inputLegFileJson;
238  inputLegFileJson << "inputlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
239  inputLegendFileJson_ = (workingDirectory_/inputLegFileJson.str()).string();
240 
241  LogDebug("FastMonitoringService") << "Initializing FastMonitor with microstate def path -: "
243  //<< encPath_.current_ + 1 << " " << encModule_.current_ + 1
244 
245  /*
246  * initialize the fast monitor with:
247  * vector of pointers to monitorable parameters
248  * path to definition
249  *
250  */
251 
253 
254  for(unsigned int i = 0; i < (mCOUNT); i++)
255  encModule_.updateReserved(static_cast<const void*>(reservedMicroStateNames+i));
257 
258  for (unsigned int i=0;i<nStreams_;i++) {
259  ministate_.push_back(&nopath_);
261 
262  //for synchronization
263  streamCounterUpdating_.push_back(new std::atomic<bool>(0));
264 
265  //path (mini) state
266  encPath_.emplace_back(0);
267  encPath_[i].update(static_cast<const void*>(&nopath_));
268  eventCountForPathInit_.push_back(0);
269  firstEventId_.push_back(0);
270  collectedPathList_.push_back(new std::atomic<bool>(0));
271 
272  }
273  //for (unsigned int i=0;i<nThreads_;i++)
274  // threadMicrostate_.push_back(&reservedMicroStateNames[mInvalid]);
275 
276  //initial size until we detect number of bins
281 
282  lastGlobalLumi_=0;
284  lumiFromSource_=0;
285 
286  //startup monitoring
288  fmt_.jsonMonitor_->setNStreams(nStreams_);
290  monInit_.store(false,std::memory_order_release);
292 
293  //this definition needs: #include "tbb/compat/thread"
294  //however this would results in TBB imeplementation replacing std::thread
295  //(both supposedly call pthread_self())
296  //number of threads created in process could be obtained from /proc,
297  //assuming that all posix threads are true kernel threads capable of running in parallel
298 
299  //#if TBB_IMPLEMENT_CPP0X
301  //threadIDAvailable_=true;
302  //#endif
303 
304  }
305 
307  {
308  std::string context;
309  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
310  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
311  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
312  edm::LogWarning("FastMonitoringService") << " STREAM " << sc.streamID().value() << " earlyTermination -: ID:"<< sc.eventID()
313  << " LS:" << sc.eventID().luminosityBlock() << " " << context;
314  std::lock_guard<std::mutex> lock(fmt_.monlock_);
315  exceptionInLS_.push_back(sc.eventID().luminosityBlock());
316  }
317 
319  {
320  std::string context;
321  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
322  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
323  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
324  edm::LogWarning("FastMonitoringService") << " GLOBAL " << "earlyTermination -: LS:"
325  << gc.luminosityBlockID().luminosityBlock() << " " << context;
326  std::lock_guard<std::mutex> lock(fmt_.monlock_);
328  }
329 
331  {
332  std::string context;
333  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
334  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
335  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
336  edm::LogWarning("FastMonitoringService") << " SOURCE " << "earlyTermination -: " << context;
337  std::lock_guard<std::mutex> lock(fmt_.monlock_);
338  exception_detected_=true;
339  }
340 
342  if (!ls) exception_detected_=true;
343  else exceptionInLS_.push_back(ls);
344  }
345 
347  {
349  }
350 
351  //new output module name is stream
353  {
354  std::lock_guard<std::mutex> lock(fmt_.monlock_);
355  //std::cout << " Pre module Begin Job module: " << desc.moduleName() << std::endl;
356 
357  //build a map of modules keyed by their module description address
358  //here we need to treat output modules in a special way so they can be easily singled out
359  if(desc.moduleName() == "Stream" || desc.moduleName() == "ShmStreamConsumer" || desc.moduleName() == "EvFOutputModule" ||
360  desc.moduleName() == "EventStreamFileWriter" || desc.moduleName() == "PoolOutputModule") {
361  encModule_.updateReserved((void*)&desc);
362  nOutputModules_++;
363  }
364  else
365  encModule_.update((void*)&desc);
366  }
367 
369  {
370  std::string && moduleLegStrJson = makeModuleLegendaJson();
371  FileIO::writeStringToFile(moduleLegendFileJson_, moduleLegStrJson);
372 
373  std::string inputLegendStrJson = makeInputLegendaJson();
374  FileIO::writeStringToFile(inputLegendFileJson_, inputLegendStrJson);
375 
377 
378  //update number of entries in module histogram
379  std::lock_guard<std::mutex> lock(fmt_.monlock_);
381  }
382 
384  {
386  fmt_.stop();
387  }
388 
390  {
392  }
393 
395  {
396  timeval lumiStartTime;
397  gettimeofday(&lumiStartTime, 0);
398  unsigned int newLumi = gc.luminosityBlockID().luminosityBlock();
399 
400  std::lock_guard<std::mutex> lock(fmt_.monlock_);
401 
402  lumiStartTime_[newLumi]=lumiStartTime;
403  while (!lastGlobalLumisClosed_.empty()) {
404  //wipe out old map entries as they aren't needed and slow down access
405  unsigned int oldLumi = lastGlobalLumisClosed_.back();
407  lumiStartTime_.erase(oldLumi);
408  avgLeadTime_.erase(oldLumi);
409  filesProcessedDuringLumi_.erase(oldLumi);
410  accuSize_.erase(oldLumi);
411  lockStatsDuringLumi_.erase(oldLumi);
412  processedEventsPerLumi_.erase(oldLumi);
413  }
414  lastGlobalLumi_= newLumi;
416  }
417 
419  {
420  unsigned int lumi = gc.luminosityBlockID().luminosityBlock();
421  LogDebug("FastMonitoringService") << "Lumi ended. Writing JSON information. LUMI -: "
422  << lumi;
423  timeval lumiStopTime;
424  gettimeofday(&lumiStopTime, 0);
425 
426  std::lock_guard<std::mutex> lock(fmt_.monlock_);
427 
428  // Compute throughput
429  timeval stt = lumiStartTime_[lumi];
430  unsigned long usecondsForLumi = (lumiStopTime.tv_sec - stt.tv_sec)*1000000
431  + (lumiStopTime.tv_usec - stt.tv_usec);
432  unsigned long accuSize = accuSize_.find(lumi)==accuSize_.end() ? 0 : accuSize_[lumi];
433  double throughput = throughputFactor()* double(accuSize) / double(usecondsForLumi);
434  //store to registered variable
435  fmt_.m_data.fastThroughputJ_.value() = throughput;
436 
437  //update
438  doSnapshot(lumi,true);
439 
440  //retrieve one result we need (todo: sanity check if it's found)
441  IntJ *lumiProcessedJptr = dynamic_cast<IntJ*>(fmt_.jsonMonitor_->getMergedIntJForLumi("Processed",lumi));
442  if (!lumiProcessedJptr)
443  throw cms::Exception("FastMonitoringService") << "Internal error: got null pointer from FastMonitor";
444  processedEventsPerLumi_[lumi] = std::pair<unsigned int,bool>(lumiProcessedJptr->value(),false);
445 
446  //checking if exception has been thrown (in case of Global/Stream early termination, for this LS)
447  bool exception_detected = exception_detected_;
448  for (auto ex : exceptionInLS_)
449  if (lumi == ex) exception_detected=true;
450 
451  if (edm::shutdown_flag || exception_detected) {
452  edm::LogInfo("FastMonitoringService") << "Run interrupted. Skip writing EoL information -: "
453  << processedEventsPerLumi_[lumi].first << " events were processed in LUMI " << lumi;
454  //this will prevent output modules from producing json file for possibly incomplete lumi
455  processedEventsPerLumi_[lumi].first=0;
456  processedEventsPerLumi_[lumi].second=true;
457  //disable this exception, so service can be used standalone (will be thrown if output module asks for this information)
458  //throw cms::Exception("FastMonitoringService") << "SOURCE did not send update for lumi block. LUMI -:" << lumi;
459  return;
460 
461  }
462 
463  if (inputSource_) {
464  auto sourceReport = inputSource_->getEventReport(lumi, true);
465  if (sourceReport.first) {
466  if (sourceReport.second!=processedEventsPerLumi_[lumi].first) {
467  throw cms::Exception("FastMonitoringService") << "MISMATCH with SOURCE update. LUMI -: "
468  << lumi
469  << ", events(processed):" << processedEventsPerLumi_[lumi].first
470  << " events(source):" << sourceReport.second;
471  }
472  }
473  }
474  edm::LogInfo("FastMonitoringService") << "Statistics for lumisection -: lumi = " << lumi << " events = "
475  << lumiProcessedJptr->value() << " time = " << usecondsForLumi/1000000
476  << " size = " << accuSize << " thr = " << throughput;
477  delete lumiProcessedJptr;
478 
479  //full global and stream merge&output for this lumi
480 
481  // create file name for slow monitoring file
482  if (filePerFwkStream_) {
483  std::stringstream slowFileNameStem;
484  slowFileNameStem << slowName_ << "_ls" << std::setfill('0') << std::setw(4)
485  << lumi << "_pid" << std::setfill('0')
486  << std::setw(5) << getpid();
488  slow /= slowFileNameStem.str();
489  fmt_.jsonMonitor_->outputFullJSONs(slow.string(),".jsn",lumi);
490  }
491  else {
492  std::stringstream slowFileName;
493  slowFileName << slowName_ << "_ls" << std::setfill('0') << std::setw(4)
494  << lumi << "_pid" << std::setfill('0')
495  << std::setw(5) << getpid() << ".jsn";
497  slow /= slowFileName.str();
498  fmt_.jsonMonitor_->outputFullJSON(slow.string(),lumi);//full global and stream merge and JSON write for this lumi
499  }
500  fmt_.jsonMonitor_->discardCollected(lumi);//we don't do further updates for this lumi
501 
503  }
504 
506  {
507  //mark closed lumis (still keep map entries until next one)
509  }
510 
512  {
513  unsigned int sid = sc.streamID().value();
514  std::lock_guard<std::mutex> lock(fmt_.monlock_);
516 
517  //reset collected values for this stream
518  *(fmt_.m_data.processed_[sid])=0;
519 
520  ministate_[sid]=&nopath_;
522  }
523 
525  {
527  }
528 
530  {
531  unsigned int sid = sc.streamID().value();
532  std::lock_guard<std::mutex> lock(fmt_.monlock_);
533 
534  #if ATOMIC_LEVEL>=2
535  //spinlock to make sure we are not still updating event counter somewhere
536  while (streamCounterUpdating_[sid]->load(std::memory_order_acquire)) {}
537  #endif
538 
539  //update processed count to be complete at this time
541  //reset this in case stream does not get notified of next lumi (we keep processed events only)
542  ministate_[sid]=&nopath_;
544  }
546  {
548  }
549 
550 
552  {
553  //make sure that all path names are retrieved before allowing ministate to change
554  //hack: assume memory is synchronized after ~50 events seen by each stream
555  if (unlikely(eventCountForPathInit_[sc.streamID()]<50) && false==collectedPathList_[sc.streamID()]->load(std::memory_order_acquire))
556  {
557  //protection between stream threads, as well as the service monitoring thread
558  std::lock_guard<std::mutex> lock(fmt_.monlock_);
559 
560  if (firstEventId_[sc.streamID()]==0)
561  firstEventId_[sc.streamID()]=sc.eventID().event();
562  if (sc.eventID().event()==firstEventId_[sc.streamID()])
563  {
564  encPath_[sc.streamID()].update((void*)&pc.pathName());
565  return;
566  }
567  else {
568  //finished collecting path names
569  collectedPathList_[sc.streamID()]->store(true,std::memory_order_seq_cst);
570  fmt_.m_data.ministateBins_=encPath_[sc.streamID()].vecsize();
571  if (!pathLegendWritten_) {
572  std::string pathLegendStrJson = makePathLegendaJson();
573  FileIO::writeStringToFile(pathLegendFileJson_, pathLegendStrJson);
574  pathLegendWritten_=true;
575  }
576  }
577  }
578  else {
579  ministate_[sc.streamID()] = &(pc.pathName());
580  }
581  }
582 
583 
585  {
586  }
587 
589  {
591 
592  ministate_[sc.streamID()] = &nopath_;
593 
594  #if ATOMIC_LEVEL>=2
595  //use atomic flag to make sure end of lumi sees this
596  streamCounterUpdating_[sc.streamID()]->store(true,std::memory_order_release);
597  fmt_.m_data.processed_[sc.streamID()]->fetch_add(1,std::memory_order_release);
598  streamCounterUpdating_[sc.streamID()]->store(false,std::memory_order_release);
599 
600  #elif ATOMIC_LEVEL==1
601  //writes are atomic, we assume writes propagate to memory before stream EOL snap
602  fmt_.m_data.processed_[sc.streamID()]->fetch_add(1,std::memory_order_relaxed);
603 
604  #elif ATOMIC_LEVEL==0 //default
605  (*(fmt_.m_data.processed_[sc.streamID()]))++;
606  #endif
608 
609  //fast path counter (events accumulated in a run)
610  unsigned long res = totalEventsProcessed_.fetch_add(1,std::memory_order_relaxed);
612  //fmt_.m_data.fastPathProcessedJ_ = totalEventsProcessed_.load(std::memory_order_relaxed);
613  }
614 
616  {
618  }
619 
621  {
623  }
624 
626  {
627  microstate_[sc.streamID().value()] = (void*)(mcc.moduleDescription());
628  }
629 
631  {
632  //microstate_[sc.streamID().value()] = (void*)(mcc.moduleDescription());
634  }
635 
636  //FUNCTIONS CALLED FROM OUTSIDE
637 
638  //this is for old-fashioned service that is not thread safe and can block other streams
639  //(we assume the worst case - everything is blocked)
641  {
642  for (unsigned int i=0;i<nStreams_;i++)
644  }
645 
646  //this is for services that are multithreading-enabled or rarely blocks other streams
648  {
650  }
651 
652  //from source
653  void FastMonitoringService::accumulateFileSize(unsigned int lumi, unsigned long fileSize) {
654  std::lock_guard<std::mutex> lock(fmt_.monlock_);
655 
656  if (accuSize_.find(lumi)==accuSize_.end()) accuSize_[lumi] = fileSize;
657  else accuSize_[lumi] += fileSize;
658 
660  filesProcessedDuringLumi_[lumi] = 1;
661  else
663  }
664 
666  gettimeofday(&fileLookStart_, 0);
667  /*
668  std::cout << "Started looking for .raw file at: s=" << fileLookStart_.tv_sec << ": ms = "
669  << fileLookStart_.tv_usec / 1000.0 << std::endl;
670  */
671  }
672 
674  gettimeofday(&fileLookStop_, 0);
675  /*
676  std::cout << "Stopped looking for .raw file at: s=" << fileLookStop_.tv_sec << ": ms = "
677  << fileLookStop_.tv_usec / 1000.0 << std::endl;
678  */
679  std::lock_guard<std::mutex> lock(fmt_.monlock_);
680 
681  if (lumi>lumiFromSource_) {
683  leadTimes_.clear();
684  }
685  unsigned long elapsedTime = (fileLookStop_.tv_sec - fileLookStart_.tv_sec) * 1000000 // sec to us
686  + (fileLookStop_.tv_usec - fileLookStart_.tv_usec); // us
687  // add this to lead times for this lumi
688  leadTimes_.push_back((double)elapsedTime);
689 
690  // recompute average lead time for this lumi
691  if (leadTimes_.size() == 1) avgLeadTime_[lumi] = leadTimes_[0];
692  else {
693  double totTime = 0;
694  for (unsigned int i = 0; i < leadTimes_.size(); i++) totTime += leadTimes_[i];
695  avgLeadTime_[lumi] = 0.001*(totTime / leadTimes_.size());
696  }
697  }
698 
699  void FastMonitoringService::reportLockWait(unsigned int ls, double waitTime, unsigned int lockCount)
700  {
701  std::lock_guard<std::mutex> lock(fmt_.monlock_);
702  lockStatsDuringLumi_[ls]=std::pair<double,unsigned int>(waitTime,lockCount);
703 
704  }
705 
706  //for the output module
707  unsigned int FastMonitoringService::getEventsProcessedForLumi(unsigned int lumi, bool * abortFlag) {
708  std::lock_guard<std::mutex> lock(fmt_.monlock_);
709 
710  auto it = processedEventsPerLumi_.find(lumi);
711  if (it!=processedEventsPerLumi_.end()) {
712  unsigned int proc = it->second.first;
713  if (abortFlag) *abortFlag=it->second.second;
714  return proc;
715  }
716  else {
717  throw cms::Exception("FastMonitoringService") << "output module wants already deleted (or never reported by SOURCE) lumisection event count for LUMI -: "<<lumi;
718  return 0;
719  }
720  }
721 
722  //for the output module
724  std::lock_guard<std::mutex> lock(fmt_.monlock_);
725 
726  auto it = processedEventsPerLumi_.find(lumi);
727  if (it!=processedEventsPerLumi_.end()) {
728  unsigned int abortFlag = it->second.second;
729  return abortFlag;
730  }
731  else {
732  throw cms::Exception("FastMonitoringService") << "output module wants already deleted (or never reported by SOURCE) lumisection status for LUMI -: "<<lumi;
733  return 0;
734  }
735  }
736 
737  void FastMonitoringService::doSnapshot(const unsigned int ls, const bool isGlobalEOL) {
738  // update macrostate
740 
741  //update these unless in the midst of a global transition
743 
744  auto itd = avgLeadTime_.find(ls);
745  if (itd != avgLeadTime_.end())
746  fmt_.m_data.fastAvgLeadTimeJ_ = itd->second;
747  else fmt_.m_data.fastAvgLeadTimeJ_=0.;
748 
749  auto iti = filesProcessedDuringLumi_.find(ls);
750  if (iti != filesProcessedDuringLumi_.end())
751  fmt_.m_data.fastFilesProcessedJ_ = iti->second;
753 
754  auto itrd = lockStatsDuringLumi_.find(ls);
755  if (itrd != lockStatsDuringLumi_.end()) {
756  fmt_.m_data.fastLockWaitJ_ = itrd->second.first;
757  fmt_.m_data.fastLockCountJ_ = itrd->second.second;
758  }
759  else {
762  }
763  }
764  else {
765  for (unsigned int i=0;i<nStreams_;i++) {
767  }
768  }
769  //else return;
770  //capture latest mini/microstate of streams
771  bool anyThreadsIdle=false;
772  bool anyThreadsEoL=false;
773  bool allThreadsEoL=true;
774  for (unsigned int i=0;i<nStreams_;i++) {
777  if (microstate_[i]==&reservedMicroStateNames[mIdle]) anyThreadsIdle=true;
778  if (microstate_[i]==&reservedMicroStateNames[mEoL]) anyThreadsEoL=true;
779  else allThreadsEoL=false;
780  }
781 
783  switch (inputSupervisorState_) {
786  break;
789  break;
792  break;
795  break;
798  break;
801  break;
804  break;
807  break;
810  break;
813  break;
816  break;
819  break;
822  break;
825  break;
828  break;
829  default:
831  }
832  }
834 
835  switch (inputSupervisorState_) {
838  break;
841  break;
844  break;
847  break;
850  break;
853  break;
856  break;
859  break;
862  break;
865  break;
868  break;
871  break;
874  break;
877  break;
880  break;
881  default:
883  }
884  }
888  else if (anyThreadsEoL && anyThreadsIdle)
890  else if (anyThreadsIdle)
892  else if (anyThreadsEoL)
894  else if (allThreadsEoL)
896  else
898  }
899  else
901 
902  //for (unsigned int i=0;i<nThreads_;i++)
903  // fmt_.m_data.threadMicrostateEncoded_[i] = encModule_.encode(threadMicrostate_[i]);
904 
905  if (isGlobalEOL)
906  {//only update global variables
907  fmt_.jsonMonitor_->snapGlobal(ls);
908  }
909  else
910  fmt_.jsonMonitor_->snap(ls);
911  }
912 
913 } //end namespace evf
914 
#define LogDebug(id)
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
FastMonitoringThread::InputState inputState_
std::string const & pathName() const
Definition: PathContext.h:37
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
EventNumber_t event() const
Definition: EventID.h:41
void watchPreStreamEarlyTermination(PreStreamEarlyTermination::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
int i
Definition: DBlmapReader.cc:9
unsigned int getEventsProcessedForLumi(unsigned int lumi, bool *abortFlag=0)
static const edm::ModuleDescription reservedMicroStateNames[mCOUNT]
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
void setExceptionDetected(unsigned int ls)
boost::filesystem::path runDirectory_
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preallocate(edm::service::SystemBounds const &)
std::map< unsigned int, timeval > lumiStartTime_
void start(void(FastMonitoringService::*fp)(), FastMonitoringService *cp)
void preGlobalBeginLumi(edm::GlobalContext const &)
std::pair< bool, unsigned int > getEventReport(unsigned int lumi, bool erase)
void setAllowAnything()
allow any parameter label/value pairs
double throughputFactor()
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postGlobalEndLumi(edm::GlobalContext const &)
TrainProcessor *const proc
Definition: MVATrainer.cc:101
void postEvent(edm::StreamContext const &)
void accumulateFileSize(unsigned int lumi, unsigned long fileSize)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
std::map< unsigned int, unsigned long > accuSize_
tuple lumi
Definition: fjr2json.py:35
std::vector< unsigned int > eventCountForPathInit_
std::vector< std::atomic< bool > * > streamCounterUpdating_
void watchPostEvent(PostEvent::slot_type const &iSlot)
static const int nReservedPaths
LuminosityBlockID const & luminosityBlockID() const
Definition: GlobalContext.h:52
def ls
Definition: eostools.py:348
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
std::string const & moduleName() const
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
Value & append(const Value &value)
Append value to array at the end.
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
volatile std::atomic< bool > shutdown_flag
void doStreamEOLSnapshot(const unsigned int ls, const unsigned int streamID)
std::map< unsigned int, unsigned int > filesProcessedDuringLumi_
void preGlobalEndLumi(edm::GlobalContext const &)
Represents a JSON value.
Definition: value.h:111
FastMonitoringThread::InputState inputSupervisorState_
LuminosityBlockNumber_t luminosityBlock() const
Definition: EventID.h:40
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
void preGlobalEarlyTermination(edm::GlobalContext const &, edm::TerminationOrigin)
#define constexpr
void watchPreSourceEarlyTermination(PreSourceEarlyTermination::slot_type const &iSlot)
void watchJobFailure(JobFailure::slot_type const &iSlot)
convenience function for attaching to signal
void registerVariables(jsoncollector::FastMonitor *fm, unsigned int nStreams, unsigned int nThreads)
#define unlikely(x)
unsigned int maxNumberOfStreams() const
Definition: SystemBounds.h:43
void preModuleBeginJob(edm::ModuleDescription const &)
static const std::string inputStateNames[FastMonitoringThread::inCOUNT]
void setComment(std::string const &value)
std::vector< std::atomic< bool > * > collectedPathList_
std::queue< unsigned int > lastGlobalLumisClosed_
void preStreamEndLumi(edm::StreamContext const &)
std::map< unsigned int, double > avgLeadTime_
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void doSnapshot(const unsigned int ls, const bool isGlobalEOL)
void preStreamEarlyTermination(edm::StreamContext const &, edm::TerminationOrigin)
void watchPreGlobalEarlyTermination(PreGlobalEarlyTermination::slot_type const &iSlot)
std::unique_ptr< jsoncollector::FastMonitor > jsonMonitor_
static const std::string nopath_
def load
Definition: svgfig.py:546
ModuleDescription const * moduleDescription() const
void resetFastMonitor(std::string const &microStateDefPath, std::string const &fastMicroStateDefPath)
static const int nReservedModules
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
std::map< unsigned int, std::pair< double, unsigned int > > lockStatsDuringLumi_
std::vector< unsigned long > firstEventId_
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
virtual std::string write(const Value &root)
Serialize a Value in JSON format.
std::vector< std::string > fastPathList_
void postStreamBeginLumi(edm::StreamContext const &)
StreamID const & streamID() const
Definition: StreamContext.h:57
void postStreamEndLumi(edm::StreamContext const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< const void * > microstate_
std::vector< unsigned int > microstateEncoded_
unsigned int value() const
Definition: StreamID.h:46
std::map< unsigned int, std::pair< unsigned int, bool > > processedEventsPerLumi_
void preStreamBeginLumi(edm::StreamContext const &)
std::atomic< unsigned long > totalEventsProcessed_
FedRawDataInputSource * inputSource_
static const int nSpecialModules
LuminosityBlockNumber_t luminosityBlock() const
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
FastMonitoringThread::Macrostate macrostate_
std::vector< double > leadTimes_
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &pc)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void stoppedLookingForFile(unsigned int lumi)
void setMicroState(MicroStateService::Microstate)
boost::filesystem::path workingDirectory_
std::vector< jsoncollector::AtomicMonUInt * > processed_
void postGlobalBeginRun(edm::GlobalContext const &)
void preEvent(edm::StreamContext const &)
void preSourceEarlyTermination(edm::TerminationOrigin)
EventID const & eventID() const
Definition: StreamContext.h:59
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
bool getAbortFlagForLumi(unsigned int lumi)
volatile std::atomic< bool > shutdown_flag false
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
Writes a Value in JSON format in a human friendly way.
Definition: writer.h:65
std::vector< const void * > ministate_
const void * decode(unsigned int index)
std::vector< unsigned int > streamLumi_
std::vector< Encoding > encPath_
std::vector< unsigned int > inputState_
std::vector< unsigned int > ministateEncoded_
void reportLockWait(unsigned int ls, double waitTime, unsigned int lockCount)
std::vector< unsigned int > exceptionInLS_
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
array value (ordered list)
Definition: value.h:31