CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Stopping.cc
Go to the documentation of this file.
1 
7 //#include "EventFilter/ResourceBroker/interface/IPCMethod.h"
9 
10 #include <iostream>
11 #include <vector>
12 #include <sstream>
13 
14 using std::cout;
15 using std::endl;
16 using std::vector;
17 using std::string;
18 using std::ostringstream;
19 using namespace evf::rb_statemachine;
20 
21 // entry action, state notification, state action
22 //______________________________________________________________________________
24 }
25 
27  SharedResourcesPtr_t res = outermost_context().getSharedResources();
28  LOG4CPLUS_INFO(res->log_, "--> ResourceBroker: NEW STATE: " << stateName());
29  outermost_context().setExternallyVisibleState(stateName());
30  outermost_context().setInternalStateName(stateName());
31  // RCMS notification no longer required here
32  // this is done in FUResourceBroker in SOAP reply
33  //outermost_context().rcmsStateChangeNotify();
34 }
35 
37  SharedResourcesPtr_t res = outermost_context().getSharedResources();
38 
39  try {
40  LOG4CPLUS_INFO(res->log_, "Start stopping :) ...");
41  res->resourceStructure_->setStopFlag(true);
42  res->resourceStructure_->shutDownClients();
43  timeval now;
44  timeval then;
45  gettimeofday(&then, 0);
46  while (!res->resourceStructure_->isReadyToShutDown()) {
47  ::usleep(res->resourceStructureTimeout_.value_ * 10);
48  gettimeofday(&now, 0);
49  if ((unsigned int) (now.tv_sec - then.tv_sec)
50  > res->resourceStructureTimeout_.value_ / 10000) {
51  cout << "times: " << now.tv_sec << " " << then.tv_sec << " "
52  << res->resourceStructureTimeout_.value_ / 10000
53  << endl;
54  LOG4CPLUS_WARN(res->log_,
55  "Some Process did not detach - going to Emergency stop! resource status:"
56  << res->resourceStructure_->printStatus() );
57 
62  //try to acquire RS lock
63  int count=5;
64  do {
65  if (!res->tryLockRSAccess()) break;
66  usleep(100000);
67  } while (--count);
68  if (!count) XCEPT_RAISE(evf::Exception,"Can not acquire RS lock for the emergency stop!");
69 
70  emergencyStop();
71 
72  res->unlockRSAccess();
73 
74  break;
75  }
76  }
77 
78  if (res->resourceStructure_->isReadyToShutDown()) {
79  // lock access to I2O discards (data & dqm)
80  res->lockRSAccess();
81 
82  // if emergency stop was not triggered
83  if (res->allowI2ODiscards_) {
84  // any I2O discards after this point are ignored
85  res->allowI2ODiscards_ = false;
86  // UPDATED: release resources
87  res->resourceStructure_->releaseResources();
88  // UPDATED: forget pending allocates to BU
89  res->resourceStructure_->resetPendingAllocates();
90  // UPDATE: reset the underlying IPC method
91  res->resourceStructure_->resetIPC();
92  }
93 
94  res->unlockRSAccess();
95 
96  LOG4CPLUS_INFO(res->log_, "Finished stopping!");
97  EventPtr stopDone(new StopDone());
98  res->commands_.enqEvent(stopDone);
99  }
100  } catch (xcept::Exception &e) {
102  }
103 }
104 
105 /*
106  * I2O capability
107  */
109  SharedResourcesPtr_t res = outermost_context().getSharedResources();
110  bool returnValue = false;
111  try {
112  returnValue = res->resourceStructure_->discardDataEvent(bufRef);
113  } catch (evf::Exception& e) {
115  }
116  return returnValue;
117 }
118 bool Stopping::discardDqmEvent(MemRef_t* bufRef) const {
119  SharedResourcesPtr_t res = outermost_context().getSharedResources();
120  bool returnValue = false;
121  try {
122  returnValue = res->resourceStructure_->discardDqmEvent(bufRef);
123  //returnValue = res->resourceStructure_->discardDqmEventWhileHalting(bufRef);
124  } catch (evf::Exception& e) {
126  }
127  return returnValue;
128 }
129 
130 // construction / destruction
131 //______________________________________________________________________________
132 Stopping::Stopping(my_context c) :
133  my_base(c) {
134  safeEntryAction();
135 }
136 
138  safeExitAction();
139 }
140 
142  SharedResourcesPtr_t res = outermost_context().getSharedResources();
143  IPCMethod* resourceStructure = res->resourceStructure_;
144 
145  LOG4CPLUS_WARN(res->log_, "in Emergency stop - handle non-clean stops");
146 
147  // UPDATE: while in emergency stop I2O discards from SM are not allowed
148  // they are re-allowed after a new enable
149  res->allowI2ODiscards_ = false;
150  {
151  #ifdef linux
152  auto lk = resourceStructure->lockCrashHandlerTimed(10);
153  #else
154  bool lk=true;
155  #endif
156  if (lk) {
157  vector < pid_t > client_prc_ids = resourceStructure->clientPrcIds();
158  for (UInt_t i = 0; i < client_prc_ids.size(); i++) {
159  pid_t pid = client_prc_ids[i];
160  cout << "B: killing process " << i << " pid= " << pid << endl;
161  if (pid != 0) {
162  //assume processes are dead by now
163  if (!resourceStructure->handleCrashedEP(res->runNumber_, pid))
164  res->nbTimeoutsWithoutEvent_++;
165  else
166  res->nbTimeoutsWithEvent_++;
167  }
168  }
169  }
170  else {
171  XCEPT_RAISE(evf::Exception,
172  "Timed out accessing the EP Crash Handler in emergency stop. SM discards not arriving?");
173  }
174  }
175  LOG4CPLUS_WARN(res->log_, "in Emergency stop - running lastResort");
176  resourceStructure->lastResort();
177  ::sleep(1);
178  if (!resourceStructure->isReadyToShutDown()) {
179  UInt_t shutdownStatus = resourceStructure->shutdownStatus();
180  std::ostringstream ostr;
181  ostr << "EmergencyStop: failed to shut down ResourceTable. Debug info mask:" << std::hex << shutdownStatus;
182  res->reasonForFailed_ = ostr.str();
183  XCEPT_RAISE(evf::Exception, res->reasonForFailed_);
184  }
185 
186  res->printWorkLoopStatus();
187  res->lock();
188 
189  LOG4CPLUS_WARN(res->log_, "Deleting the resource structure!");
190  delete res->resourceStructure_;
191  res->resourceStructure_ = 0;
192 
193  cout << "cycle through resourcetable config " << endl;
194  res->configureResources(outermost_context().getApp());
195  res->unlock();
196  if (res->shmInconsistent_)
197  XCEPT_RAISE(evf::Exception, "Inconsistent shm state");
198  cout << "done with emergency stop" << endl;
199 }
200 
201 // exit action, state name, move to failed state
202 //______________________________________________________________________________
204 }
205 
206 string Stopping::do_stateName() const {
207  return std::string("Stopping");
208 }
209 
211  SharedResourcesPtr_t res = outermost_context().getSharedResources();
212  res->reasonForFailed_ = exception.what();
213  LOG4CPLUS_FATAL(res->log_,
214  "Moving to FAILED state! Reason: " << exception.what());
215  EventPtr fail(new Fail());
216  res->commands_.enqEvent(fail);
217 }
virtual std::vector< pid_t > clientPrcIds() const =0
int i
Definition: DBlmapReader.cc:9
virtual void lastResort()=0
boost::shared_ptr< SharedResources > SharedResourcesPtr_t
toolbox::mem::Reference MemRef_t
Definition: FUTypes.h:10
reject
Definition: HLTenums.h:23
boost::shared_ptr< boost::statechart::event_base > EventPtr
Definition: CommandQueue.h:23
void sleep(Duration_t)
Definition: Utils.h:163
virtual void do_moveToFailedState(xcept::Exception &exception) const
Definition: Stopping.cc:210
virtual bool discardDataEvent(MemRef_t *bufRef) const
Definition: Stopping.cc:108
virtual void do_stateNotify()
Definition: Stopping.cc:26
bool isReadyToShutDown() const
Definition: IPCMethod.h:186
virtual bool discardDqmEvent(MemRef_t *bufRef) const
Definition: Stopping.cc:118
virtual bool handleCrashedEP(UInt_t runNumber, pid_t pid)=0
void moveToFailedState(xcept::Exception &exception) const
Definition: BaseState.cc:35
unsigned int UInt_t
Definition: FUTypes.h:12
virtual void do_entryActionWork()
Definition: Stopping.cc:23
virtual std::string do_stateName() const
Definition: Stopping.cc:206
UInt_t shutdownStatus()
Definition: IPCMethod.h:194
virtual void do_exitActionWork()
Definition: Stopping.cc:203
virtual void do_stateAction() const
Definition: Stopping.cc:36
tuple cout
Definition: gather_cfg.py:121
std::string stateName() const
Definition: BaseState.cc:31