1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 4 #include <alpaka/alpaka.hpp> 14 template <
typename TElem,
20 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
26 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
28 template <
typename TExtent>
31 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
33 return alpaka::allocAsyncBuf<TElem, TIdx>(
queue, extent);
37 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 40 template <
typename TElem,
typename TDim,
typename TIdx>
41 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtBlocking,
void> {
42 template <
typename TExtent>
43 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
44 alpaka::QueueCudaRtBlocking
queue,
45 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
46 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
48 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
51 size_t size = alpaka::getExtentProduct(extent);
52 size_t sizeBytes = size *
sizeof(TElem);
53 void* memPtr = allocator.allocate(sizeBytes,
queue);
56 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
58 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
63 template <
typename TElem,
typename TDim,
typename TIdx>
64 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking,
void> {
65 template <
typename TExtent>
66 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
67 alpaka::QueueCudaRtNonBlocking
queue,
68 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
69 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
71 auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
74 size_t size = alpaka::getExtentProduct(extent);
75 size_t sizeBytes =
size *
sizeof(TElem);
76 void* memPtr = allocator.allocate(sizeBytes,
queue);
79 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
81 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
86 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
87 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCudaRt, TQueue,
void> {
88 template <
typename TExtent>
89 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCudaRt
const& dev,
91 TExtent
const& extent) -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
92 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
94 auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
96 size_t width = alpaka::getWidth(extent);
97 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
99 size_t pitchBytes = widthBytes;
100 size_t size = alpaka::getExtentProduct(extent);
101 size_t sizeBytes =
size *
sizeof(TElem);
102 void* memPtr = allocator.allocate(sizeBytes,
queue);
105 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
107 return alpaka::BufCudaRt<TElem, TDim, TIdx>(
108 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent, pitchBytes);
112 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 114 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 117 template <
typename TElem,
typename TDim,
typename TIdx>
118 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtBlocking,
void> {
119 template <
typename TExtent>
120 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
121 alpaka::QueueHipRtBlocking
queue,
122 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
123 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
125 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
128 size_t size = alpaka::getExtentProduct(extent);
129 size_t sizeBytes =
size *
sizeof(TElem);
130 void* memPtr = allocator.allocate(sizeBytes,
queue);
133 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
135 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
140 template <
typename TElem,
typename TDim,
typename TIdx>
141 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtNonBlocking,
void> {
142 template <
typename TExtent>
143 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
144 alpaka::QueueHipRtNonBlocking
queue,
145 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
146 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
148 auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
151 size_t size = alpaka::getExtentProduct(extent);
152 size_t sizeBytes =
size *
sizeof(TElem);
153 void* memPtr = allocator.allocate(sizeBytes,
queue);
156 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
158 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
163 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
164 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevHipRt, TQueue,
void> {
165 template <
typename TExtent>
166 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevHipRt
const& dev,
168 TExtent
const& extent) -> alpaka::BufHipRt<TElem, TDim, TIdx> {
169 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
171 auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
173 size_t width = alpaka::getWidth(extent);
174 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
176 size_t pitchBytes = widthBytes;
177 size_t size = alpaka::getExtentProduct(extent);
178 size_t sizeBytes =
size *
sizeof(TElem);
179 void* memPtr = allocator.allocate(sizeBytes,
queue);
182 auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
184 return alpaka::BufHipRt<TElem, TDim, TIdx>(
185 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent, pitchBytes);
189 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 193 template <
typename TElem,
198 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
205 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
TEMPL(T2) struct Divides void