#include <TestCUDAProducerGPUKernel.h>
Public Member Functions | |
cms::cuda::device::unique_ptr < float[]> | runAlgo (const std::string &label, cudaStream_t stream) const |
cms::cuda::device::unique_ptr < float[]> | runAlgo (const std::string &label, const float *d_input, cudaStream_t stream) const |
void | runSimpleAlgo (float *d_data, cudaStream_t stream) const |
TestCUDAProducerGPUKernel ()=default | |
~TestCUDAProducerGPUKernel ()=default | |
Static Public Attributes | |
static constexpr int | NUM_VALUES = 4000 |
This class models the actual CUDA implementation of an algorithm.
Memory is allocated dynamically with the allocator in cms::cuda.
The algorithm is intended to waste time with large matrix operations so that the asynchronous nature of the CUDA integration becomes visible with debug prints.
Definition at line 19 of file TestCUDAProducerGPUKernel.h.
|
default |
|
default |
|
inline |
Definition at line 27 of file TestCUDAProducerGPUKernel.h.
Referenced by TestCUDAProducerGPUEW::acquire(), TestCUDAProducerGPUEWTask::acquire(), TestCUDAProducerGPUFirst::produce(), and TestCUDAProducerGPU::produce().
cms::cuda::device::unique_ptr<float[]> TestCUDAProducerGPUKernel::runAlgo | ( | const std::string & | label, |
const float * | d_input, | ||
cudaStream_t | stream | ||
) | const |
void TestCUDAProducerGPUKernel::runSimpleAlgo | ( | float * | d_data, |
cudaStream_t | stream | ||
) | const |
Referenced by TestCUDAProducerGPUEWTask::addSimpleWork().
|
static |
Definition at line 21 of file TestCUDAProducerGPUKernel.h.
Referenced by TestCUDAProducerGPUtoCPU::acquire(), and TestCUDAProducerGPUtoCPU::produce().