CMS 3D CMS Logo

ScopedContext.cc
Go to the documentation of this file.
2 
9 
10 #include "chooseDevice.h"
11 
12 namespace cms::cuda {
13  namespace impl {
14  ScopedContextBase::ScopedContextBase(edm::StreamID streamID) : currentDevice_(chooseDevice(streamID)) {
15  cudaCheck(cudaSetDevice(currentDevice_));
17  }
18 
19  ScopedContextBase::ScopedContextBase(const ProductBase& data) : currentDevice_(data.device()) {
20  cudaCheck(cudaSetDevice(currentDevice_));
21  if (data.mayReuseStream()) {
22  stream_ = data.streamPtr();
23  } else {
25  }
26  }
27 
29  : currentDevice_(device), stream_(std::move(stream)) {
30  cudaCheck(cudaSetDevice(currentDevice_));
31  }
32 
34 
36  cudaStream_t dataStream,
37  bool available,
38  cudaEvent_t dataEvent) {
39  if (dataDevice != device()) {
40  // Eventually replace with prefetch to current device (assuming unified memory works)
41  // If we won't go to unified memory, need to figure out something else...
42  throw cms::Exception("LogicError") << "Handling data from multiple devices is not yet supported";
43  }
44 
45  if (dataStream != stream()) {
46  // Different streams, need to synchronize
47  if (not available) {
48  // Event not yet occurred, so need to add synchronization
49  // here. Sychronization is done by making the CUDA stream to
50  // wait for an event, so all subsequent work in the stream
51  // will run only after the event has "occurred" (i.e. data
52  // product became available).
53  cudaCheck(cudaStreamWaitEvent(stream(), dataEvent, 0), "Failed to make a stream to wait for an event");
54  }
55  }
56  }
57 
58  void ScopedContextHolderHelper::enqueueCallback(int device, cudaStream_t stream) {
60  SharedEventPtr event = getEventCache().get();
61  cudaCheck(cudaEventRecord(event.get(), stream));
62  async->runAsync(
64  [event = std::move(event)]() mutable { cudaCheck(cudaEventSynchronize(event.get())); },
65  []() { return "Enqueued by cms::cuda::ScopedContextHolderHelper::enqueueCallback()"; });
66  }
67  } // namespace impl
68 
70 
73  if (contextState_) {
75  }
76  }
77 
79  throw cms::Exception("LogicError")
80  << "Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
81  "ContextState, but that was not the case";
82  }
83 
85 
87  // Intentionally not checking the return value to avoid throwing
88  // exceptions. If this call would fail, we should get failures
89  // elsewhere as well.
90  cudaEventRecord(event_.get(), stream());
91  }
92 
94 
96 } // namespace cms::cuda
std::shared_ptr< std::remove_pointer_t< cudaEvent_t > > SharedEventPtr
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
std::shared_ptr< std::remove_pointer_t< cudaStream_t > > SharedStreamPtr
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
SharedEventPtr get()
Definition: EventCache.cc:21
void runAsync(WaitingTaskWithArenaHolder holder, F &&func, G &&errorContextFunc)
Definition: Async.h:21
const SharedStreamPtr & streamPtr() const
Definition: ScopedContext.h:35
int chooseDevice(edm::StreamID id)
Definition: chooseDevice.cc:8
impl::ScopedContextHolderHelper holderHelper_
SharedStreamPtr get()
Definition: StreamCache.cc:20
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
Definition: ScopedContext.h:90
void set(int device, SharedStreamPtr stream)
Definition: ContextState.h:30
~ScopedContextAcquire() noexcept(false)
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
StreamCache & getStreamCache()
Definition: StreamCache.cc:39
EventCache & getEventCache()
Definition: EventCache.cc:66
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
impl::ScopedContextHolderHelper holderHelper_
void enqueueCallback(int device, cudaStream_t stream)
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1
ScopedContextBase(edm::StreamID streamID)