CMS 3D CMS Logo

ScopedContext.cc
Go to the documentation of this file.
2 
8 
9 #include "chooseDevice.h"
10 
11 namespace {
12  struct CallbackData {
14  int device;
15  };
16 
17  void CUDART_CB cudaScopedContextCallback(cudaStream_t streamId, cudaError_t status, void* data) {
18  std::unique_ptr<CallbackData> guard{reinterpret_cast<CallbackData*>(data)};
19  edm::WaitingTaskWithArenaHolder& waitingTaskHolder = guard->holder;
20  int device = guard->device;
21  if (status == cudaSuccess) {
22  LogTrace("ScopedContext") << " GPU kernel finished (in callback) device " << device << " CUDA stream "
23  << streamId;
24  waitingTaskHolder.doneWaiting(nullptr);
25  } else {
26  // wrap the exception in a try-catch block to let GDB "catch throw" break on it
27  try {
28  auto error = cudaGetErrorName(status);
29  auto message = cudaGetErrorString(status);
30  throw cms::Exception("CUDAError") << "Callback of CUDA stream " << streamId << " in device " << device
31  << " error " << error << ": " << message;
32  } catch (cms::Exception&) {
33  waitingTaskHolder.doneWaiting(std::current_exception());
34  }
35  }
36  }
37 } // namespace
38 
39 namespace cms::cuda {
40  namespace impl {
41  ScopedContextBase::ScopedContextBase(edm::StreamID streamID) : currentDevice_(chooseDevice(streamID)) {
42  cudaCheck(cudaSetDevice(currentDevice_));
44  }
45 
46  ScopedContextBase::ScopedContextBase(const ProductBase& data) : currentDevice_(data.device()) {
47  cudaCheck(cudaSetDevice(currentDevice_));
48  if (data.mayReuseStream()) {
49  stream_ = data.streamPtr();
50  } else {
52  }
53  }
54 
56  : currentDevice_(device), stream_(std::move(stream)) {
57  cudaCheck(cudaSetDevice(currentDevice_));
58  }
59 
61 
63  cudaStream_t dataStream,
64  bool available,
65  cudaEvent_t dataEvent) {
66  if (dataDevice != device()) {
67  // Eventually replace with prefetch to current device (assuming unified memory works)
68  // If we won't go to unified memory, need to figure out something else...
69  throw cms::Exception("LogicError") << "Handling data from multiple devices is not yet supported";
70  }
71 
72  if (dataStream != stream()) {
73  // Different streams, need to synchronize
74  if (not available) {
75  // Event not yet occurred, so need to add synchronization
76  // here. Sychronization is done by making the CUDA stream to
77  // wait for an event, so all subsequent work in the stream
78  // will run only after the event has "occurred" (i.e. data
79  // product became available).
80  cudaCheck(cudaStreamWaitEvent(stream(), dataEvent, 0), "Failed to make a stream to wait for an event");
81  }
82  }
83  }
84 
85  void ScopedContextHolderHelper::enqueueCallback(int device, cudaStream_t stream) {
86  cudaCheck(
87  cudaStreamAddCallback(stream, cudaScopedContextCallback, new CallbackData{waitingTaskHolder_, device}, 0));
88  }
89  } // namespace impl
90 
92 
95  if (contextState_) {
97  }
98  }
99 
101  throw cms::Exception("LogicError")
102  << "Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
103  "ContextState, but that was not the case";
104  }
105 
107 
109  // Intentionally not checking the return value to avoid throwing
110  // exceptions. If this call would fail, we should get failures
111  // elsewhere as well.
112  cudaEventRecord(event_.get(), stream());
113  }
114 
116 
118 } // namespace cms::cuda
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
#define LogTrace(id)
void doneWaiting(std::exception_ptr iExcept)
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35
int chooseDevice(edm::StreamID id)
Definition: chooseDevice.cc:8
impl::ScopedContextHolderHelper holderHelper_
SharedStreamPtr get()
Definition: StreamCache.cc:20
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90
void set(int device, SharedStreamPtr stream)
Definition: ContextState.h:30
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
StreamCache & getStreamCache()
Definition: StreamCache.cc:39
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
impl::ScopedContextHolderHelper holderHelper_
void enqueueCallback(int device, cudaStream_t stream)
def move(src, dest)
Definition: eostools.py:511
ScopedContextBase(edm::StreamID streamID)