1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h 4 #include <alpaka/alpaka.hpp> 15 template <
typename TElem,
21 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
27 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
29 template <
typename TExtent>
30 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev, TQueue
queue, TExtent
const& extent)
31 -> alpaka::BufCpu<TElem, TDim, TIdx> {
33 return alpaka::allocAsyncBuf<TElem, TIdx>(
queue, extent);
37 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED 40 template <
typename TElem,
typename TDim,
typename TIdx>
41 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtBlocking,
void> {
42 template <
typename TExtent>
43 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
44 alpaka::QueueCudaRtBlocking
queue,
45 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
46 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
48 auto&
allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
51 size_t size = alpaka::getExtentProduct(extent);
52 size_t sizeBytes =
size *
sizeof(TElem);
58 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
63 template <
typename TElem,
typename TDim,
typename TIdx>
64 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking,
void> {
65 template <
typename TExtent>
66 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
67 alpaka::QueueCudaRtNonBlocking
queue,
68 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
69 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
71 auto&
allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
74 size_t size = alpaka::getExtentProduct(extent);
75 size_t sizeBytes =
size *
sizeof(TElem);
81 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
86 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
87 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCudaRt, TQueue,
void> {
88 template <
typename TExtent>
89 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCudaRt
const& dev, TQueue
queue, TExtent
const& extent)
90 -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
91 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
93 auto&
allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
95 size_t width = alpaka::getWidth(extent);
96 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
98 size_t pitchBytes = widthBytes;
99 size_t size = alpaka::getExtentProduct(extent);
100 size_t sizeBytes =
size *
sizeof(TElem);
106 return alpaka::BufCudaRt<TElem, TDim, TIdx>(
107 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), pitchBytes, extent);
111 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED 113 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED 116 template <
typename TElem,
typename TDim,
typename TIdx>
117 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtBlocking,
void> {
118 template <
typename TExtent>
119 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
120 alpaka::QueueHipRtBlocking
queue,
121 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
122 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
124 auto&
allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
127 size_t size = alpaka::getExtentProduct(extent);
128 size_t sizeBytes =
size *
sizeof(TElem);
134 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
139 template <
typename TElem,
typename TDim,
typename TIdx>
140 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevCpu, alpaka::QueueHipRtNonBlocking,
void> {
141 template <
typename TExtent>
142 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevCpu
const& dev,
143 alpaka::QueueHipRtNonBlocking
queue,
144 TExtent
const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
145 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
147 auto&
allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
150 size_t size = alpaka::getExtentProduct(extent);
151 size_t sizeBytes =
size *
sizeof(TElem);
157 return alpaka::BufCpu<TElem, TDim, TIdx>(dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), extent);
162 template <
typename TElem,
typename TDim,
typename TIdx,
typename TQueue>
163 struct CachedBufAlloc<TElem, TDim, TIdx,
alpaka::DevHipRt, TQueue,
void> {
164 template <
typename TExtent>
165 ALPAKA_FN_HOST
static auto allocCachedBuf(alpaka::DevHipRt
const& dev, TQueue
queue, TExtent
const& extent)
166 -> alpaka::BufHipRt<TElem, TDim, TIdx> {
167 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
169 auto&
allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
171 size_t width = alpaka::getWidth(extent);
172 size_t widthBytes =
width *
static_cast<TIdx
>(
sizeof(TElem));
174 size_t pitchBytes = widthBytes;
175 size_t size = alpaka::getExtentProduct(extent);
176 size_t sizeBytes =
size *
sizeof(TElem);
182 return alpaka::BufHipRt<TElem, TDim, TIdx>(
183 dev,
reinterpret_cast<TElem*
>(memPtr),
std::move(
deleter), pitchBytes, extent);
187 #endif // ALPAKA_ACC_GPU_HIP_ENABLED 191 template <
typename TElem,
196 typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
203 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
TEMPL(T2) struct Divides void