1 #ifndef HeterogeneousCore_CUDAUtilities_interface_prefixScan_h 2 #define HeterogeneousCore_CUDAUtilities_interface_prefixScan_h 45 template <
typename VT,
typename T>
62 warpPrefixScan(ci,
co,
i,
mask);
78 co[
i] +=
ws[warpId - 1];
83 for (uint32_t
i = 1;
i <
size; ++
i)
106 warpPrefixScan(
c,
i,
mask);
108 auto warpId =
i / 32;
121 auto warpId =
i / 32;
122 c[
i] +=
ws[warpId - 1];
126 for (uint32_t
i = 1;
i <
size; ++
i)
135 asm volatile(
"mov.u32 %0, %dynamic_smem_size;" :
"=r"(
ret));
141 template <
typename T>
142 __global__ void multiBlockPrefixScan(
T const* ici,
T* ico, int32_t
size, int32_t* pc) {
143 volatile T const* ci = ici;
144 volatile T*
co = ico;
156 __shared__
bool isLastBlockDone;
165 if (!isLastBlockDone)
173 extern __shared__
T psum[];
179 blockPrefixScan(psum, psum,
gridDim.x,
ws);
189 #endif // HeterogeneousCore_CUDAUtilities_interface_prefixScan_h
ret
prodAgent to be discontinued
__host__ __device__ VT * co
Namespace of DDCMS conversion namespace.
__host__ __device__ VT uint32_t size
T1 atomicAdd(T1 *a, T2 b)