1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 4 #include <alpaka/alpaka.hpp> 14 template <
typename TElem,
20 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
26 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
28 template <
typename TExtent>
29 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev, TQueue
queue, TExtent
const& extent)
30 -> alpaka::BufCpu<TElem, TDim, TIdx> {
32 return alpaka::allocAsyncBuf<TElem, TIdx>(
queue, extent);
36 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 39 template <
typename TElem,
typename TDim,
typename TIdx>
40 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtBlocking,
void> {
41 template <
typename TExtent>
42 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
43 alpaka::QueueCudaRtBlocking
queue,
44 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
45 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
47 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
50 size_t size = alpaka::getExtentProduct(extent);
51 size_t sizeBytes = size *
sizeof(TElem);
52 void* memPtr = allocator.allocate(sizeBytes,
queue);
55 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
57 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
62 template <
typename TElem,
typename TDim,
typename TIdx>
63 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking,
void> {
64 template <
typename TExtent>
65 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
66 alpaka::QueueCudaRtNonBlocking
queue,
67 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
68 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
70 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
73 size_t size = alpaka::getExtentProduct(extent);
74 size_t sizeBytes =
size *
sizeof(TElem);
75 void* memPtr = allocator.allocate(sizeBytes,
queue);
78 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
80 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
85 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
86 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCudaRt, TQueue,
void> {
87 template <
typename TExtent>
88 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCudaRt
const& dev, TQueue
queue, TExtent
const& extent)
89 -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
90 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
92 auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
94 size_t width = alpaka::getWidth(extent);
95 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
97 size_t pitchBytes = widthBytes;
98 size_t size = alpaka::getExtentProduct(extent);
99 size_t sizeBytes =
size *
sizeof(TElem);
100 void* memPtr = allocator.allocate(sizeBytes,
queue);
103 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
105 return alpaka::BufCudaRt<TElem, TDim, TIdx>(
106 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent, pitchBytes);
110 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 112 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 115 template <
typename TElem,
typename TDim,
typename TIdx>
116 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtBlocking,
void> {
117 template <
typename TExtent>
118 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
119 alpaka::QueueHipRtBlocking
queue,
120 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
121 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
123 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
126 size_t size = alpaka::getExtentProduct(extent);
127 size_t sizeBytes =
size *
sizeof(TElem);
128 void* memPtr = allocator.allocate(sizeBytes,
queue);
131 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
133 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
138 template <
typename TElem,
typename TDim,
typename TIdx>
139 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtNonBlocking,
void> {
140 template <
typename TExtent>
141 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
142 alpaka::QueueHipRtNonBlocking
queue,
143 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
144 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
146 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
149 size_t size = alpaka::getExtentProduct(extent);
150 size_t sizeBytes =
size *
sizeof(TElem);
151 void* memPtr = allocator.allocate(sizeBytes,
queue);
154 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
156 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
161 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
162 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevHipRt, TQueue,
void> {
163 template <
typename TExtent>
164 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevHipRt
const& dev, TQueue
queue, TExtent
const& extent)
165 -> alpaka::BufHipRt<TElem, TDim, TIdx> {
166 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
168 auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
170 size_t width = alpaka::getWidth(extent);
171 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
173 size_t pitchBytes = widthBytes;
174 size_t size = alpaka::getExtentProduct(extent);
175 size_t sizeBytes =
size *
sizeof(TElem);
176 void* memPtr = allocator.allocate(sizeBytes,
queue);
179 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
181 return alpaka::BufHipRt<TElem, TDim, TIdx>(
182 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent, pitchBytes);
186 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 190 template <
typename TElem,
195 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
202 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
TEMPL(T2) struct Divides void