CMS 3D CMS Logo

FastMonitoringService.cc
Go to the documentation of this file.
2 #include <iostream>
3 
5 #include <iomanip>
6 #include <sys/time.h>
7 
18 
21 using namespace jsoncollector;
22 
25 
26 constexpr double throughputFactor() {return (1000000)/double(1024*1024);}
27 
28 static const int nReservedModules = 64;
29 static const int nSpecialModules = 10;
30 static const int nReservedPaths = 1;
31 
32 namespace evf{
33 
34  const std::string FastMonitoringService::macroStateNames[FastMonitoringThread::MCOUNT] =
35  {"Init","JobReady","RunGiven","Running",
36  "Stopping","Done","JobEnded","Error","ErrorEnded","End",
37  "Invalid"};
38 
39  const std::string FastMonitoringService::inputStateNames[FastMonitoringThread::inCOUNT] =
40  {"Ignore","Init","WaitInput","NewLumi","NewLumiBusyEndingLS","NewLumiIdleEndingLS","RunEnd","ProcessingFile","WaitChunk","ChunkReceived",
41  "ChecksumEvent","CachedEvent","ReadEvent","ReadCleanup","NoRequest","NoRequestWithIdleThreads",
42  "NoRequestWithGlobalEoL","NoRequestWithEoLThreads",
43  "SupFileLimit", "SupWaitFreeChunk","SupWaitFreeChunkCopying", "SupWaitFreeThread","SupWaitFreeThreadCopying",
44  "SupBusy", "SupLockPolling","SupLockPollingCopying",
45  "SupNoFile", "SupNewFile", "SupNewFileWaitThreadCopying", "SupNewFileWaitThread",
46  "SupNewFileWaitChunkCopying", "SupNewFileWaitChunk",
47  "WaitInput_fileLimit","WaitInput_waitFreeChunk","WaitInput_waitFreeChunkCopying","WaitInput_waitFreeThread","WaitInput_waitFreeThreadCopying",
48  "WaitInput_busy","WaitInput_lockPolling","WaitInput_lockPollingCopying","WaitInput_runEnd",
49  "WaitInput_noFile","WaitInput_newFile","WaitInput_newFileWaitThreadCopying","WaitInput_newFileWaitThread",
50  "WaitInput_newFileWaitChunkCopying","WaitInput_newFileWaitChunk",
51  "WaitChunk_fileLimit","WaitChunk_waitFreeChunk","WaitChunk_waitFreeChunkCopying","WaitChunk_waitFreeThread","WaitChunk_waitFreeThreadCopying",
52  "WaitChunk_busy","WaitChunk_lockPolling","WaitChunk_lockPollingCopying","WaitChunk_runEnd",
53  "WaitChunk_noFile","WaitChunk_newFile","WaitChunk_newFileWaitThreadCopying","WaitChunk_newFileWaitThread",
54  "WaitChunk_newFileWaitChunkCopying","WaitChunk_newFileWaitChunk"
55  };
56 
57 
58  const std::string FastMonitoringService::nopath_ = "NoPath";
59 
60  FastMonitoringService::FastMonitoringService(const edm::ParameterSet& iPS,
61  edm::ActivityRegistry& reg) :
62  MicroStateService(iPS,reg)
63  ,encModule_(nReservedModules)
64  ,nStreams_(0)//until initialized
65  ,sleepTime_(iPS.getUntrackedParameter<int>("sleepTime", 1))
66  ,fastMonIntervals_(iPS.getUntrackedParameter<unsigned int>("fastMonIntervals", 2))
67  ,fastName_("fastmoni")
68  ,slowName_("slowmoni")
69  ,filePerFwkStream_(iPS.getUntrackedParameter<bool>("filePerFwkStream", false))
70  ,totalEventsProcessed_(0)
71  {
72  reg.watchPreallocate(this, &FastMonitoringService::preallocate);//receiving information on number of threads
74 
79 
83 
88 
90 
93 
94  reg.watchPreSourceEvent(this,&FastMonitoringService::preSourceEvent);//source (with streamID of requestor)
96 
99 
103 
104  //find microstate definition path (required by the module)
105  struct stat statbuf;
106  std::string microstateBaseSuffix = "src/EventFilter/Utilities/plugins/microstatedef.jsd";
107  std::string microstatePath = std::string(getenv("CMSSW_BASE")) + "/" + microstateBaseSuffix;
108  if (stat(microstatePath.c_str(), &statbuf)) {
109  microstatePath = std::string(getenv("CMSSW_RELEASE_BASE")) + "/" + microstateBaseSuffix;
110  if (stat(microstatePath.c_str(), &statbuf)) {
111  microstatePath = microstateBaseSuffix;
112  if (stat(microstatePath.c_str(), &statbuf))
113  throw cms::Exception("FastMonitoringService") << "microstate definition file not found";
114  }
115  }
116  fastMicrostateDefPath_ = microstateDefPath_ = microstatePath;
117  }
118 
119 
121  {
122  }
123 
125  {
127  desc.setComment("Service for File-based DAQ monitoring and event accounting");
128  desc.addUntracked<int> ("sleepTime",1)->setComment("Sleep time of the monitoring thread");
129  desc.addUntracked<unsigned int> ("fastMonIntervals",2)->setComment("Modulo of sleepTime intervals on which fastmon file is written out");
130  desc.addUntracked<bool> ("filePerFwkStream", false)->setComment("Switches on monitoring output per framework stream");
131  desc.setAllowAnything();
132  descriptions.add("FastMonitoringService", desc);
133  }
134 
135 
137  Json::Value legendaVector(Json::arrayValue);
138  for(int i = 0; i < encPath_[0].current_; i++)
139  legendaVector.append(Json::Value(*(static_cast<const std::string *>(encPath_[0].decode(i)))));
140  Json::Value valReserved(nReservedPaths);
141  Json::Value pathLegend;
142  pathLegend["names"]=legendaVector;
143  pathLegend["reserved"]=valReserved;
145  return writer.write(pathLegend);
146  }
147 
149  Json::Value legendaVector(Json::arrayValue);
150  for(int i = 0; i < encModule_.current_; i++)
151  legendaVector.append(Json::Value((static_cast<const edm::ModuleDescription *>(encModule_.decode(i)))->moduleLabel()));
152  Json::Value valReserved(nReservedModules);
153  Json::Value valSpecial(nSpecialModules);
154  Json::Value valOutputModules(nOutputModules_);
155  Json::Value moduleLegend;
156  moduleLegend["names"]=legendaVector;
157  moduleLegend["reserved"]=valReserved;
158  moduleLegend["special"]=valSpecial;
159  moduleLegend["output"]=valOutputModules;
161  return writer.write(moduleLegend);
162  }
163 
165  Json::Value legendaVector(Json::arrayValue);
166  for(int i = 0; i < FastMonitoringThread::inCOUNT; i++)
167  legendaVector.append(Json::Value(inputStateNames[i]));
168  Json::Value moduleLegend;
169  moduleLegend["names"]=legendaVector;
171  return writer.write(moduleLegend);
172  }
173 
175  {
176  nStreams_=bounds.maxNumberOfStreams();
177  nThreads_=bounds.maxNumberOfThreads();
178  //this should already be >=1
179  if (nStreams_==0) nStreams_=1;
180  if (nThreads_==0) nThreads_=1;
181  }
182 
184  edm::ProcessContext const& pc)
185  {
186 
187  // FIND RUN DIRECTORY
188  // The run dir should be set via the configuration of EvFDaqDirector
189 
190  if (edm::Service<evf::EvFDaqDirector>().operator->()==nullptr)
191  {
192  throw cms::Exception("FastMonitoringService") << "EvFDaqDirector is not present";
193 
194  }
195  boost::filesystem::path runDirectory(edm::Service<evf::EvFDaqDirector>()->baseRunDir());
196  workingDirectory_ = runDirectory_ = runDirectory;
197  workingDirectory_ /= "mon";
198 
199  if ( !boost::filesystem::is_directory(workingDirectory_)) {
200  LogDebug("FastMonitoringService") << "<MON> DIR NOT FOUND! Trying to create -: " << workingDirectory_.string() ;
201  boost::filesystem::create_directories(workingDirectory_);
202  if ( !boost::filesystem::is_directory(workingDirectory_))
203  edm::LogWarning("FastMonitoringService") << "Unable to create <MON> DIR -: " << workingDirectory_.string()
204  << ". No monitoring data will be written.";
205  }
206 
207  std::ostringstream fastFileName;
208 
209  fastFileName << fastName_ << "_pid" << std::setfill('0') << std::setw(5) << getpid() << ".fast";
211  fast /= fastFileName.str();
212  fastPath_ = fast.string();
213  if (filePerFwkStream_)
214  for (unsigned int i=0;i<nStreams_;i++) {
215  std::ostringstream fastFileNameTid;
216  fastFileNameTid << fastName_ << "_pid" << std::setfill('0') << std::setw(5) << getpid() << "_tid" << i << ".fast";
218  fastTid /= fastFileNameTid.str();
219  fastPathList_.push_back(fastTid.string());
220  }
221 
222  std::ostringstream moduleLegFile;
223  std::ostringstream moduleLegFileJson;
224  moduleLegFile << "microstatelegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".leg";
225  moduleLegFileJson << "microstatelegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
226  moduleLegendFile_ = (workingDirectory_/moduleLegFile.str()).string();
227  moduleLegendFileJson_ = (workingDirectory_/moduleLegFileJson.str()).string();
228 
229  std::ostringstream pathLegFile;
230  std::ostringstream pathLegFileJson;
231  pathLegFile << "pathlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".leg";
232  pathLegendFile_ = (workingDirectory_/pathLegFile.str()).string();
233  pathLegFileJson << "pathlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
234  pathLegendFileJson_ = (workingDirectory_/pathLegFileJson.str()).string();
235 
236  std::ostringstream inputLegFileJson;
237  inputLegFileJson << "inputlegend_pid" << std::setfill('0') << std::setw(5) << getpid() << ".jsn";
238  inputLegendFileJson_ = (workingDirectory_/inputLegFileJson.str()).string();
239 
240  LogDebug("FastMonitoringService") << "Initializing FastMonitor with microstate def path -: "
242  //<< encPath_.current_ + 1 << " " << encModule_.current_ + 1
243 
244  /*
245  * initialize the fast monitor with:
246  * vector of pointers to monitorable parameters
247  * path to definition
248  *
249  */
250 
252 
253  for(unsigned int i = 0; i < (mCOUNT); i++)
254  encModule_.updateReserved(static_cast<const void*>(reservedMicroStateNames+i));
256 
257  for (unsigned int i=0;i<nStreams_;i++) {
258  ministate_.emplace_back(&nopath_);
260 
261  //for synchronization
262  streamCounterUpdating_.push_back(new std::atomic<bool>(false));
263 
264  //path (mini) state
265  encPath_.emplace_back(0);
266  encPath_[i].update(static_cast<const void*>(&nopath_));
267  eventCountForPathInit_.push_back(0);
268  firstEventId_.push_back(0);
269  collectedPathList_.push_back(new std::atomic<bool>(false));
270 
271  }
272  //for (unsigned int i=0;i<nThreads_;i++)
273  // threadMicrostate_.push_back(&reservedMicroStateNames[mInvalid]);
274 
275  //initial size until we detect number of bins
280 
281  lastGlobalLumi_=0;
282  isInitTransition_=true;
283  lumiFromSource_=0;
284 
285  //startup monitoring
287  fmt_.jsonMonitor_->setNStreams(nStreams_);
289  monInit_.store(false,std::memory_order_release);
291 
292  //this definition needs: #include "tbb/compat/thread"
293  //however this would results in TBB imeplementation replacing std::thread
294  //(both supposedly call pthread_self())
295  //number of threads created in process could be obtained from /proc,
296  //assuming that all posix threads are true kernel threads capable of running in parallel
297 
298  //#if TBB_IMPLEMENT_CPP0X
300  //threadIDAvailable_=true;
301  //#endif
302 
303  }
304 
306  {
307  std::string context;
308  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
309  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
310  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
311  edm::LogWarning("FastMonitoringService") << " STREAM " << sc.streamID().value() << " earlyTermination -: ID:"<< sc.eventID()
312  << " LS:" << sc.eventID().luminosityBlock() << " " << context;
313  std::lock_guard<std::mutex> lock(fmt_.monlock_);
314  exceptionInLS_.push_back(sc.eventID().luminosityBlock());
315  }
316 
318  {
319  std::string context;
320  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
321  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
322  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
323  edm::LogWarning("FastMonitoringService") << " GLOBAL " << "earlyTermination -: LS:"
324  << gc.luminosityBlockID().luminosityBlock() << " " << context;
325  std::lock_guard<std::mutex> lock(fmt_.monlock_);
327  }
328 
330  {
331  std::string context;
332  if (to==edm::TerminationOrigin::ExceptionFromThisContext) context = " FromThisContext ";
333  if (to==edm::TerminationOrigin::ExceptionFromAnotherContext) context = " FromAnotherContext";
334  if (to==edm::TerminationOrigin::ExternalSignal) context = " FromExternalSignal";
335  edm::LogWarning("FastMonitoringService") << " SOURCE " << "earlyTermination -: " << context;
336  std::lock_guard<std::mutex> lock(fmt_.monlock_);
337  exception_detected_=true;
338  }
339 
341  if (!ls) exception_detected_=true;
342  else exceptionInLS_.push_back(ls);
343  }
344 
346  {
348  }
349 
350  //new output module name is stream
352  {
353  std::lock_guard<std::mutex> lock(fmt_.monlock_);
354  //std::cout << " Pre module Begin Job module: " << desc.moduleName() << std::endl;
355 
356  //build a map of modules keyed by their module description address
357  //here we need to treat output modules in a special way so they can be easily singled out
358  if(desc.moduleName() == "Stream" || desc.moduleName() == "ShmStreamConsumer" || desc.moduleName() == "EvFOutputModule" ||
359  desc.moduleName() == "EventStreamFileWriter" || desc.moduleName() == "PoolOutputModule") {
360  encModule_.updateReserved((void*)&desc);
361  nOutputModules_++;
362  }
363  else
364  encModule_.update((void*)&desc);
365  }
366 
368  {
369  std::string && moduleLegStrJson = makeModuleLegendaJson();
370  FileIO::writeStringToFile(moduleLegendFileJson_, moduleLegStrJson);
371 
372  std::string inputLegendStrJson = makeInputLegendaJson();
373  FileIO::writeStringToFile(inputLegendFileJson_, inputLegendStrJson);
374 
376 
377  //update number of entries in module histogram
378  std::lock_guard<std::mutex> lock(fmt_.monlock_);
380  }
381 
383  {
385  fmt_.stop();
386  }
387 
389  {
391  isInitTransition_=false;
392  }
393 
395  {
396  timeval lumiStartTime;
397  gettimeofday(&lumiStartTime, nullptr);
398  unsigned int newLumi = gc.luminosityBlockID().luminosityBlock();
399  lastGlobalLumi_ = newLumi;
400 
401  std::lock_guard<std::mutex> lock(fmt_.monlock_);
402  lumiStartTime_[newLumi]=lumiStartTime;
403 
404 
405  }
406 
408  {
409  unsigned int lumi = gc.luminosityBlockID().luminosityBlock();
410  LogDebug("FastMonitoringService") << "Lumi ended. Writing JSON information. LUMI -: "
411  << lumi;
412  timeval lumiStopTime;
413  gettimeofday(&lumiStopTime, nullptr);
414 
415  std::lock_guard<std::mutex> lock(fmt_.monlock_);
416 
417  // Compute throughput
418  timeval stt = lumiStartTime_[lumi];
419  lumiStartTime_.erase(lumi);
420  unsigned long usecondsForLumi = (lumiStopTime.tv_sec - stt.tv_sec)*1000000
421  + (lumiStopTime.tv_usec - stt.tv_usec);
422  unsigned long accuSize = accuSize_.find(lumi)==accuSize_.end() ? 0 : accuSize_[lumi];
423  accuSize_.erase(lumi);
424  double throughput = throughputFactor()* double(accuSize) / double(usecondsForLumi);
425  //store to registered variable
426  fmt_.m_data.fastThroughputJ_.value() = throughput;
427 
428  //update
429  doSnapshot(lumi,true);
430 
431  //retrieve one result we need (todo: sanity check if it's found)
432  IntJ *lumiProcessedJptr = dynamic_cast<IntJ*>(fmt_.jsonMonitor_->getMergedIntJForLumi("Processed",lumi));
433  if (!lumiProcessedJptr)
434  throw cms::Exception("FastMonitoringService") << "Internal error: got null pointer from FastMonitor";
435  processedEventsPerLumi_[lumi] = std::pair<unsigned int,bool>(lumiProcessedJptr->value(),false);
436 
437  //checking if exception has been thrown (in case of Global/Stream early termination, for this LS)
438  bool exception_detected = exception_detected_;
439  for (auto ex : exceptionInLS_)
440  if (lumi == ex) exception_detected=true;
441 
442  if (edm::shutdown_flag || exception_detected) {
443  edm::LogInfo("FastMonitoringService") << "Run interrupted. Skip writing EoL information -: "
444  << processedEventsPerLumi_[lumi].first << " events were processed in LUMI " << lumi;
445  //this will prevent output modules from producing json file for possibly incomplete lumi
446  processedEventsPerLumi_[lumi].first=0;
447  processedEventsPerLumi_[lumi].second=true;
448  //disable this exception, so service can be used standalone (will be thrown if output module asks for this information)
449  //throw cms::Exception("FastMonitoringService") << "SOURCE did not send update for lumi block. LUMI -:" << lumi;
450  return;
451 
452  }
453 
454  if (inputSource_) {
455  auto sourceReport = inputSource_->getEventReport(lumi, true);
456  if (sourceReport.first) {
457  if (sourceReport.second!=processedEventsPerLumi_[lumi].first) {
458  throw cms::Exception("FastMonitoringService") << "MISMATCH with SOURCE update. LUMI -: "
459  << lumi
460  << ", events(processed):" << processedEventsPerLumi_[lumi].first
461  << " events(source):" << sourceReport.second;
462  }
463  }
464  }
465  edm::LogInfo("FastMonitoringService") << "Statistics for lumisection -: lumi = " << lumi << " events = "
466  << lumiProcessedJptr->value() << " time = " << usecondsForLumi/1000000
467  << " size = " << accuSize << " thr = " << throughput;
468  delete lumiProcessedJptr;
469 
470  //full global and stream merge&output for this lumi
471 
472  // create file name for slow monitoring file
473  if (filePerFwkStream_) {
474  std::stringstream slowFileNameStem;
475  slowFileNameStem << slowName_ << "_ls" << std::setfill('0') << std::setw(4)
476  << lumi << "_pid" << std::setfill('0')
477  << std::setw(5) << getpid();
479  slow /= slowFileNameStem.str();
480  fmt_.jsonMonitor_->outputFullJSONs(slow.string(),".jsn",lumi);
481  }
482  else {
483  std::stringstream slowFileName;
484  slowFileName << slowName_ << "_ls" << std::setfill('0') << std::setw(4)
485  << lumi << "_pid" << std::setfill('0')
486  << std::setw(5) << getpid() << ".jsn";
488  slow /= slowFileName.str();
489  fmt_.jsonMonitor_->outputFullJSON(slow.string(),lumi);//full global and stream merge and JSON write for this lumi
490  }
491  fmt_.jsonMonitor_->discardCollected(lumi);//we don't do further updates for this lumi
492 
493  }
494 
496  {
497  std::lock_guard<std::mutex> lock(fmt_.monlock_);
498  unsigned int lumi = gc.luminosityBlockID().luminosityBlock();
499  //LS monitoring snapshot with input source data has been taken in previous callback
500  avgLeadTime_.erase(lumi);
501  filesProcessedDuringLumi_.erase(lumi);
502  lockStatsDuringLumi_.erase(lumi);
503 
504  //output module already used this in end lumi (this could be migrated to EvFDaqDirector as it is essential for FFF bookkeeping)
505  processedEventsPerLumi_.erase(lumi);
506  }
507 
509  {
510  unsigned int sid = sc.streamID().value();
511 
512  std::lock_guard<std::mutex> lock(fmt_.monlock_);
514 
515  //reset collected values for this stream
516  *(fmt_.m_data.processed_[sid])=0;
517 
518  ministate_[sid]=&nopath_;
520  }
521 
523  {
525  }
526 
528  {
529  unsigned int sid = sc.streamID().value();
530  std::lock_guard<std::mutex> lock(fmt_.monlock_);
531 
532  //update processed count to be complete at this time
534  //reset this in case stream does not get notified of next lumi (we keep processed events only)
535  ministate_[sid]=&nopath_;
537  }
539  {
541  }
542 
543 
545  {
546  //make sure that all path names are retrieved before allowing ministate to change
547  //hack: assume memory is synchronized after ~50 events seen by each stream
548  if (UNLIKELY(eventCountForPathInit_[sc.streamID()]<50) && false==collectedPathList_[sc.streamID()]->load(std::memory_order_acquire))
549  {
550  //protection between stream threads, as well as the service monitoring thread
551  std::lock_guard<std::mutex> lock(fmt_.monlock_);
552 
553  if (firstEventId_[sc.streamID()]==0)
554  firstEventId_[sc.streamID()]=sc.eventID().event();
555  if (sc.eventID().event()==firstEventId_[sc.streamID()])
556  {
557  encPath_[sc.streamID()].update((void*)&pc.pathName());
558  return;
559  }
560  else {
561  //finished collecting path names
562  collectedPathList_[sc.streamID()]->store(true,std::memory_order_seq_cst);
563  fmt_.m_data.ministateBins_=encPath_[sc.streamID()].vecsize();
564  if (!pathLegendWritten_) {
565  std::string pathLegendStrJson = makePathLegendaJson();
566  FileIO::writeStringToFile(pathLegendFileJson_, pathLegendStrJson);
567  pathLegendWritten_=true;
568  }
569  }
570  }
571  else {
572  ministate_[sc.streamID()] = &(pc.pathName());
573  }
574  }
575 
576 
578  {
579  }
580 
582  {
584 
585  ministate_[sc.streamID()] = &nopath_;
586 
587  (*(fmt_.m_data.processed_[sc.streamID()]))++;
588  eventCountForPathInit_[sc.streamID()].m_value++;
589 
590  //fast path counter (events accumulated in a run)
591  unsigned long res = totalEventsProcessed_.fetch_add(1,std::memory_order_relaxed);
593  //fmt_.m_data.fastPathProcessedJ_ = totalEventsProcessed_.load(std::memory_order_relaxed);
594  }
595 
597  {
599  }
600 
602  {
604  }
605 
607  {
608  microstate_[sc.streamID().value()] = (void*)(mcc.moduleDescription());
609  }
610 
612  {
613  //microstate_[sc.streamID().value()] = (void*)(mcc.moduleDescription());
615  }
616 
617  //FUNCTIONS CALLED FROM OUTSIDE
618 
619  //this is for old-fashioned service that is not thread safe and can block other streams
620  //(we assume the worst case - everything is blocked)
622  {
623  for (unsigned int i=0;i<nStreams_;i++)
625  }
626 
627  //this is for services that are multithreading-enabled or rarely blocks other streams
629  {
631  }
632 
633  //from source
634  void FastMonitoringService::accumulateFileSize(unsigned int lumi, unsigned long fileSize) {
635  std::lock_guard<std::mutex> lock(fmt_.monlock_);
636 
637  if (accuSize_.find(lumi)==accuSize_.end()) accuSize_[lumi] = fileSize;
638  else accuSize_[lumi] += fileSize;
639 
641  filesProcessedDuringLumi_[lumi] = 1;
642  else
644  }
645 
647  gettimeofday(&fileLookStart_, nullptr);
648  /*
649  std::cout << "Started looking for .raw file at: s=" << fileLookStart_.tv_sec << ": ms = "
650  << fileLookStart_.tv_usec / 1000.0 << std::endl;
651  */
652  }
653 
655  gettimeofday(&fileLookStop_, nullptr);
656  /*
657  std::cout << "Stopped looking for .raw file at: s=" << fileLookStop_.tv_sec << ": ms = "
658  << fileLookStop_.tv_usec / 1000.0 << std::endl;
659  */
660  std::lock_guard<std::mutex> lock(fmt_.monlock_);
661 
662  if (lumi>lumiFromSource_) {
664  leadTimes_.clear();
665  }
666  unsigned long elapsedTime = (fileLookStop_.tv_sec - fileLookStart_.tv_sec) * 1000000 // sec to us
667  + (fileLookStop_.tv_usec - fileLookStart_.tv_usec); // us
668  // add this to lead times for this lumi
669  leadTimes_.push_back((double)elapsedTime);
670 
671  // recompute average lead time for this lumi
672  if (leadTimes_.size() == 1) avgLeadTime_[lumi] = leadTimes_[0];
673  else {
674  double totTime = 0;
675  for (unsigned int i = 0; i < leadTimes_.size(); i++) totTime += leadTimes_[i];
676  avgLeadTime_[lumi] = 0.001*(totTime / leadTimes_.size());
677  }
678  }
679 
680  void FastMonitoringService::reportLockWait(unsigned int ls, double waitTime, unsigned int lockCount)
681  {
682  std::lock_guard<std::mutex> lock(fmt_.monlock_);
683  lockStatsDuringLumi_[ls]=std::pair<double,unsigned int>(waitTime,lockCount);
684 
685  }
686 
687  //for the output module
688  unsigned int FastMonitoringService::getEventsProcessedForLumi(unsigned int lumi, bool * abortFlag) {
689  std::lock_guard<std::mutex> lock(fmt_.monlock_);
690 
691  auto it = processedEventsPerLumi_.find(lumi);
692  if (it!=processedEventsPerLumi_.end()) {
693  unsigned int proc = it->second.first;
694  if (abortFlag) *abortFlag=it->second.second;
695  return proc;
696  }
697  else {
698  throw cms::Exception("FastMonitoringService") << "output module wants already deleted (or never reported by SOURCE) lumisection event count for LUMI -: "<<lumi;
699  return 0;
700  }
701  }
702 
703  //for the output module
705  std::lock_guard<std::mutex> lock(fmt_.monlock_);
706 
707  auto it = processedEventsPerLumi_.find(lumi);
708  if (it!=processedEventsPerLumi_.end()) {
709  unsigned int abortFlag = it->second.second;
710  return abortFlag;
711  }
712  else {
713  throw cms::Exception("FastMonitoringService") << "output module wants already deleted (or never reported by SOURCE) lumisection status for LUMI -: "<<lumi;
714  return false;
715  }
716  }
717 
718  void FastMonitoringService::doSnapshot(const unsigned int ls, const bool isGlobalEOL) {
719  // update macrostate
721 
722  std::vector<const void*> microstateCopy(microstate_.begin(),microstate_.end());
723 
724  if (!isInitTransition_) {
725 
726  auto itd = avgLeadTime_.find(ls);
727  if (itd != avgLeadTime_.end())
728  fmt_.m_data.fastAvgLeadTimeJ_ = itd->second;
729  else fmt_.m_data.fastAvgLeadTimeJ_=0.;
730 
731  auto iti = filesProcessedDuringLumi_.find(ls);
732  if (iti != filesProcessedDuringLumi_.end())
733  fmt_.m_data.fastFilesProcessedJ_ = iti->second;
735 
736  auto itrd = lockStatsDuringLumi_.find(ls);
737  if (itrd != lockStatsDuringLumi_.end()) {
738  fmt_.m_data.fastLockWaitJ_ = itrd->second.first;
739  fmt_.m_data.fastLockCountJ_ = itrd->second.second;
740  }
741  else {
744  }
745  }
746 
747  for (unsigned int i=0;i<nStreams_;i++) {
749  fmt_.m_data.microstateEncoded_[i] = encModule_.encode(microstateCopy[i]);
750  }
751 
752  bool inputStatePerThread=false;
753 
755  switch (inputSupervisorState_) {
758  break;
761  break;
764  break;
767  break;
770  break;
773  break;
776  break;
779  break;
782  break;
785  break;
788  break;
791  break;
794  break;
797  break;
800  break;
801  default:
803  }
804  }
806 
807  switch (inputSupervisorState_) {
810  break;
813  break;
816  break;
819  break;
822  break;
825  break;
828  break;
831  break;
834  break;
837  break;
840  break;
843  break;
846  break;
849  break;
852  break;
853  default:
855  }
856  }
858  inputStatePerThread=true;
859  for (unsigned int i=0;i<nStreams_;i++) {
860  if (microstateCopy[i]==&reservedMicroStateNames[mIdle])
862  else if (microstateCopy[i]==&reservedMicroStateNames[mEoL] ||
863  microstateCopy[i]==&reservedMicroStateNames[mFwkEoL])
865  else
867  }
868  }
870  inputStatePerThread=true;
871  for (unsigned int i=0;i<nStreams_;i++) {
872  if (microstateCopy[i]==&reservedMicroStateNames[mEoL] ||
873  microstateCopy[i]==&reservedMicroStateNames[mFwkEoL])
875  }
876  }
877  else
879 
880  //this is same for all streams
881  if (!inputStatePerThread)
882  for (unsigned int i=1;i<nStreams_;i++)
884 
885  if (isGlobalEOL)
886  {//only update global variables
887  fmt_.jsonMonitor_->snapGlobal(ls);
888  }
889  else
890  fmt_.jsonMonitor_->snap(ls);
891  }
892 
893 } //end namespace evf
894 
#define LogDebug(id)
void prePathEvent(edm::StreamContext const &, edm::PathContext const &)
std::string const & pathName() const
Definition: PathContext.h:37
unsigned int maxNumberOfThreads() const
Definition: SystemBounds.h:46
EventNumber_t event() const
Definition: EventID.h:41
void watchPreStreamEarlyTermination(PreStreamEarlyTermination::slot_type const &iSlot)
void watchPreEvent(PreEvent::slot_type const &iSlot)
unsigned int getEventsProcessedForLumi(unsigned int lumi, bool *abortFlag=0)
static const edm::ModuleDescription reservedMicroStateNames[mCOUNT]
std::vector< ContainableAtomic< const void * > > microstate_
std::atomic< bool > isInitTransition_
void watchPrePathEvent(PrePathEvent::slot_type const &iSlot)
void watchPreallocate(Preallocate::slot_type const &iSlot)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
void setExceptionDetected(unsigned int ls)
boost::filesystem::path runDirectory_
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void preallocate(edm::service::SystemBounds const &)
std::map< unsigned int, timeval > lumiStartTime_
void start(void(FastMonitoringService::*fp)(), FastMonitoringService *cp)
void preGlobalBeginLumi(edm::GlobalContext const &)
std::pair< bool, unsigned int > getEventReport(unsigned int lumi, bool erase)
void setAllowAnything()
allow any parameter label/value pairs
double throughputFactor()
void watchPreModuleEvent(PreModuleEvent::slot_type const &iSlot)
void postGlobalEndLumi(edm::GlobalContext const &)
TrainProcessor *const proc
Definition: MVATrainer.cc:101
void postEvent(edm::StreamContext const &)
void accumulateFileSize(unsigned int lumi, unsigned long fileSize)
void watchPreGlobalEndLumi(PreGlobalEndLumi::slot_type const &iSlot)
std::map< unsigned int, unsigned long > accuSize_
std::atomic< FastMonitoringThread::InputState > inputSupervisorState_
std::vector< std::atomic< bool > * > streamCounterUpdating_
void watchPostEvent(PostEvent::slot_type const &iSlot)
static const int nReservedPaths
volatile std::atomic< bool > shutdown_flag
LuminosityBlockID const & luminosityBlockID() const
Definition: GlobalContext.h:57
void watchPostStreamEndLumi(PostStreamEndLumi::slot_type const &iSlot)
void watchPreGlobalBeginLumi(PreGlobalBeginLumi::slot_type const &iSlot)
std::string const & moduleName() const
void watchPostModuleEvent(PostModuleEvent::slot_type const &iSlot)
Value & append(const Value &value)
Append value to array at the end.
void watchPostSourceEvent(PostSourceEvent::slot_type const &iSlot)
void doStreamEOLSnapshot(const unsigned int ls, const unsigned int streamID)
std::map< unsigned int, unsigned int > filesProcessedDuringLumi_
void preGlobalEndLumi(edm::GlobalContext const &)
Represents a JSON value.
Definition: value.h:111
LuminosityBlockNumber_t luminosityBlock() const
Definition: EventID.h:40
void watchPreStreamEndLumi(PreStreamEndLumi::slot_type const &iSlot)
void preGlobalEarlyTermination(edm::GlobalContext const &, edm::TerminationOrigin)
#define constexpr
void watchPreSourceEarlyTermination(PreSourceEarlyTermination::slot_type const &iSlot)
Definition: Electron.h:6
void watchJobFailure(JobFailure::slot_type const &iSlot)
convenience function for attaching to signal
void registerVariables(jsoncollector::FastMonitor *fm, unsigned int nStreams, unsigned int nThreads)
unsigned int maxNumberOfStreams() const
Definition: SystemBounds.h:43
void preModuleBeginJob(edm::ModuleDescription const &)
static const std::string inputStateNames[FastMonitoringThread::inCOUNT]
void setMicroState(MicroStateService::Microstate) override
void setComment(std::string const &value)
std::vector< std::atomic< bool > * > collectedPathList_
void preStreamEndLumi(edm::StreamContext const &)
std::map< unsigned int, double > avgLeadTime_
void watchPostStreamBeginLumi(PostStreamBeginLumi::slot_type const &iSlot)
void doSnapshot(const unsigned int ls, const bool isGlobalEOL)
void preStreamEarlyTermination(edm::StreamContext const &, edm::TerminationOrigin)
void watchPreGlobalEarlyTermination(PreGlobalEarlyTermination::slot_type const &iSlot)
std::unique_ptr< jsoncollector::FastMonitor > jsonMonitor_
static const std::string nopath_
ModuleDescription const * moduleDescription() const
void resetFastMonitor(std::string const &microStateDefPath, std::string const &fastMicroStateDefPath)
static const int nReservedModules
void watchPostGlobalEndLumi(PostGlobalEndLumi::slot_type const &iSlot)
std::map< unsigned int, std::pair< double, unsigned int > > lockStatsDuringLumi_
std::vector< unsigned long > firstEventId_
void watchPreModuleBeginJob(PreModuleBeginJob::slot_type const &iSlot)
void postModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
std::vector< std::string > fastPathList_
std::vector< ContainableAtomic< unsigned int > > eventCountForPathInit_
void postStreamBeginLumi(edm::StreamContext const &)
std::atomic< FastMonitoringThread::InputState > inputState_
StreamID const & streamID() const
Definition: StreamContext.h:57
def ls(path, rec=False)
Definition: eostools.py:349
void postStreamEndLumi(edm::StreamContext const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::vector< unsigned int > microstateEncoded_
unsigned int value() const
Definition: StreamID.h:46
std::map< unsigned int, std::pair< unsigned int, bool > > processedEventsPerLumi_
void preStreamBeginLumi(edm::StreamContext const &)
std::atomic< unsigned long > totalEventsProcessed_
FedRawDataInputSource * inputSource_
std::atomic< FastMonitoringThread::Macrostate > macrostate_
static const int nSpecialModules
std::vector< ContainableAtomic< const void * > > ministate_
LuminosityBlockNumber_t luminosityBlock() const
void watchPreStreamBeginLumi(PreStreamBeginLumi::slot_type const &iSlot)
void add(std::string const &label, ParameterSetDescription const &psetDescription)
std::vector< double > leadTimes_
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &pc)
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
void stoppedLookingForFile(unsigned int lumi)
std::string write(const Value &root) override
Serialize a Value in JSON format.
boost::filesystem::path workingDirectory_
std::vector< jsoncollector::AtomicMonUInt * > processed_
void postGlobalBeginRun(edm::GlobalContext const &)
void preEvent(edm::StreamContext const &)
void preSourceEarlyTermination(edm::TerminationOrigin)
EventID const & eventID() const
Definition: StreamContext.h:63
void watchPreSourceEvent(PreSourceEvent::slot_type const &iSlot)
bool getAbortFlagForLumi(unsigned int lumi)
void preModuleEvent(edm::StreamContext const &, edm::ModuleCallingContext const &)
Writes a Value in JSON format in a human friendly way.
Definition: writer.h:65
const void * decode(unsigned int index)
std::vector< unsigned int > streamLumi_
std::vector< Encoding > encPath_
std::vector< unsigned int > inputState_
std::vector< unsigned int > ministateEncoded_
void reportLockWait(unsigned int ls, double waitTime, unsigned int lockCount)
std::vector< unsigned int > exceptionInLS_
void watchPostBeginJob(PostBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
array value (ordered list)
Definition: value.h:31
#define UNLIKELY(x)