CMS 3D CMS Logo

CachedBufAlloc.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
2 #define HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
3 
4 #include <alpaka/alpaka.hpp>
5 
8 
9 namespace cms::alpakatools {
10 
11  namespace traits {
12 
14  template <typename TElem,
15  typename TDim,
16  typename TIdx,
17  typename TDev,
18  typename TQueue,
19  typename = void,
20  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
21  struct CachedBufAlloc {
22  static_assert(alpaka::meta::DependentFalseType<TDev>::value, "This device does not support a caching allocator");
23  };
24 
26  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
27  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, TQueue, void> {
28  template <typename TExtent>
29  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev, TQueue queue, TExtent const& extent)
30  -> alpaka::BufCpu<TElem, TDim, TIdx> {
31  // non-cached, queue-ordered asynchronous host-only memory
32  return alpaka::allocAsyncBuf<TElem, TIdx>(queue, extent);
33  }
34  };
35 
36 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
37 
39  template <typename TElem, typename TDim, typename TIdx>
40  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtBlocking, void> {
41  template <typename TExtent>
42  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
43  alpaka::QueueCudaRtBlocking queue,
44  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
45  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
46 
47  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtBlocking>();
48 
49  // FIXME the BufCpu does not support a pitch ?
50  size_t size = alpaka::getExtentProduct(extent);
51  size_t sizeBytes = size * sizeof(TElem);
52  void* memPtr = allocator.allocate(sizeBytes, queue);
53 
54  // use a custom deleter to return the buffer to the CachingAllocator
55  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
56 
57  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
58  }
59  };
60 
62  template <typename TElem, typename TDim, typename TIdx>
63  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
64  template <typename TExtent>
65  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
66  alpaka::QueueCudaRtNonBlocking queue,
67  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
68  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
69 
70  auto& allocator = getHostCachingAllocator<alpaka::QueueCudaRtNonBlocking>();
71 
72  // FIXME the BufCpu does not support a pitch ?
73  size_t size = alpaka::getExtentProduct(extent);
74  size_t sizeBytes = size * sizeof(TElem);
75  void* memPtr = allocator.allocate(sizeBytes, queue);
76 
77  // use a custom deleter to return the buffer to the CachingAllocator
78  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
79 
80  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
81  }
82  };
83 
85  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
86  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCudaRt, TQueue, void> {
87  template <typename TExtent>
88  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCudaRt const& dev, TQueue queue, TExtent const& extent)
89  -> alpaka::BufCudaRt<TElem, TDim, TIdx> {
90  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
91 
92  auto& allocator = getDeviceCachingAllocator<alpaka::DevCudaRt, TQueue>(dev);
93 
94  size_t width = alpaka::getWidth(extent);
95  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
96  // TODO implement pitch for TDim > 1
97  size_t pitchBytes = widthBytes;
98  size_t size = alpaka::getExtentProduct(extent);
99  size_t sizeBytes = size * sizeof(TElem);
100  void* memPtr = allocator.allocate(sizeBytes, queue);
101 
102  // use a custom deleter to return the buffer to the CachingAllocator
103  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
104 
105  return alpaka::BufCudaRt<TElem, TDim, TIdx>(
106  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
107  }
108  };
109 
110 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
111 
112 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
113 
115  template <typename TElem, typename TDim, typename TIdx>
116  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtBlocking, void> {
117  template <typename TExtent>
118  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
119  alpaka::QueueHipRtBlocking queue,
120  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
121  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
122 
123  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtBlocking>();
124 
125  // FIXME the BufCpu does not support a pitch ?
126  size_t size = alpaka::getExtentProduct(extent);
127  size_t sizeBytes = size * sizeof(TElem);
128  void* memPtr = allocator.allocate(sizeBytes, queue);
129 
130  // use a custom deleter to return the buffer to the CachingAllocator
131  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
132 
133  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
134  }
135  };
136 
138  template <typename TElem, typename TDim, typename TIdx>
139  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
140  template <typename TExtent>
141  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
142  alpaka::QueueHipRtNonBlocking queue,
143  TExtent const& extent) -> alpaka::BufCpu<TElem, TDim, TIdx> {
144  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
145 
146  auto& allocator = getHostCachingAllocator<alpaka::QueueHipRtNonBlocking>();
147 
148  // FIXME the BufCpu does not support a pitch ?
149  size_t size = alpaka::getExtentProduct(extent);
150  size_t sizeBytes = size * sizeof(TElem);
151  void* memPtr = allocator.allocate(sizeBytes, queue);
152 
153  // use a custom deleter to return the buffer to the CachingAllocator
154  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
155 
156  return alpaka::BufCpu<TElem, TDim, TIdx>(dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent);
157  }
158  };
159 
161  template <typename TElem, typename TDim, typename TIdx, typename TQueue>
162  struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevHipRt, TQueue, void> {
163  template <typename TExtent>
164  ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent)
165  -> alpaka::BufHipRt<TElem, TDim, TIdx> {
166  ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
167 
168  auto& allocator = getDeviceCachingAllocator<alpaka::DevHipRt, TQueue>(dev);
169 
170  size_t width = alpaka::getWidth(extent);
171  size_t widthBytes = width * static_cast<TIdx>(sizeof(TElem));
172  // TODO implement pitch for TDim > 1
173  size_t pitchBytes = widthBytes;
174  size_t size = alpaka::getExtentProduct(extent);
175  size_t sizeBytes = size * sizeof(TElem);
176  void* memPtr = allocator.allocate(sizeBytes, queue);
177 
178  // use a custom deleter to return the buffer to the CachingAllocator
179  auto deleter = [alloc = &allocator](TElem* ptr) { alloc->free(ptr); };
180 
181  return alpaka::BufHipRt<TElem, TDim, TIdx>(
182  dev, reinterpret_cast<TElem*>(memPtr), std::move(deleter), extent, pitchBytes);
183  }
184  };
185 
186 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
187 
188  } // namespace traits
189 
190  template <typename TElem,
191  typename TIdx,
192  typename TExtent,
193  typename TQueue,
194  typename TDev,
195  typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
196  ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
197  return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
198  }
199 
200 } // namespace cms::alpakatools
201 
202 #endif // HeterogeneousCore_AlpakaInterface_interface_CachedBufAlloc_h
size
Write out results.
ALPAKA_FN_HOST auto allocCachedBuf(TDev const &dev, TQueue queue, TExtent const &extent=TExtent())
TEMPL(T2) struct Divides void
Definition: Factorize.h:24
The caching memory allocator trait.
def move(src, dest)
Definition: eostools.py:511
static ALPAKA_FN_HOST auto allocCachedBuf(alpaka::DevCpu const &dev, TQueue queue, TExtent const &extent) -> alpaka::BufCpu< TElem, TDim, TIdx >