17 void CUDART_CB cudaScopedContextCallback(cudaStream_t streamId, cudaError_t
status,
void*
data) {
18 std::unique_ptr<CallbackData> guard{
reinterpret_cast<CallbackData*
>(
data)};
20 int device = guard->device;
21 if (status == cudaSuccess) {
22 LogTrace(
"ScopedContext") <<
" GPU kernel finished (in callback) device " << device <<
" CUDA stream "
28 auto error = cudaGetErrorName(status);
29 auto message = cudaGetErrorString(status);
30 throw cms::Exception(
"CUDAError") <<
"Callback of CUDA stream " << streamId <<
" in device " << device
31 <<
" error " <<
error <<
": " << message;
33 waitingTaskHolder.
doneWaiting(std::current_exception());
56 : currentDevice_(device), stream_(std::
move(stream)) {
63 cudaStream_t dataStream,
65 cudaEvent_t dataEvent) {
66 if (dataDevice !=
device()) {
69 throw cms::Exception(
"LogicError") <<
"Handling data from multiple devices is not yet supported";
72 if (dataStream !=
stream()) {
80 cudaCheck(cudaStreamWaitEvent(
stream(), dataEvent, 0),
"Failed to make a stream to wait for an event");
87 cudaStreamAddCallback(stream, cudaScopedContextCallback,
new CallbackData{
waitingTaskHolder_, device}, 0));
102 <<
"Calling ScopedContextAcquire::insertNextTask() requires ScopedContextAcquire to be constructed with "
103 "ContextState, but that was not the case";
void synchronizeStreams(int dataDevice, cudaStream_t dataStream, bool available, cudaEvent_t dataEvent)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
const SharedStreamPtr & streamPtr() const
ContextState * contextState_
std::shared_ptr< std::remove_pointer_t< cudaStream_t >> SharedStreamPtr
void doneWaiting(std::exception_ptr iExcept)
int chooseDevice(edm::StreamID id)
impl::ScopedContextHolderHelper holderHelper_
~ScopedContextProduce()
Record the CUDA event, all asynchronous work must have been queued before the destructor.
edm::WaitingTaskWithArenaHolder waitingTaskHolder_
void set(int device, SharedStreamPtr stream)
const SharedStreamPtr & streamPtr() const
cudaStream_t stream() const
char data[epos_bytes_allocation]
StreamCache & getStreamCache()
#define cudaCheck(ARG,...)
impl::ScopedContextHolderHelper holderHelper_
void enqueueCallback(int device, cudaStream_t stream)
ScopedContextBase(edm::StreamID streamID)
bool mayReuseStream() const