CMS 3D CMS Logo

cudaCompat.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
2 #define HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
3 
4 /*
5  * Everything you need to run cuda code in plain sequential c++ code
6  */
7 
8 #ifndef __CUDACC__
9 
10 #include <algorithm>
11 #include <cstdint>
12 #include <cstring>
13 
14 // include the CUDA runtime header to define some of the attributes, types and sybols also on the CPU
15 #include <cuda_runtime.h>
16 
17 // make sure function are inlined to avoid multiple definition
18 #undef __global__
19 #define __global__ inline __attribute__((always_inline))
20 
21 #undef __forceinline__
22 #define __forceinline__ inline __attribute__((always_inline))
23 
24 namespace cms {
25  namespace cudacompat {
26 
27  // run serially with a single thread
28  // 1-dimensional block
29  const dim3 threadIdx = {0, 0, 0};
30  const dim3 blockDim = {1, 1, 1};
31  // 1-dimensional grid
32  const dim3 blockIdx = {0, 0, 0};
33  const dim3 gridDim = {1, 1, 1};
34 
35  template <typename T1, typename T2>
36  T1 atomicCAS(T1* address, T1 compare, T2 val) {
37  T1 old = *address;
38  *address = old == compare ? val : old;
39  return old;
40  }
41 
42  template <typename T1, typename T2>
43  T1 atomicCAS_block(T1* address, T1 compare, T2 val) {
44  return atomicCAS(address, compare, val);
45  }
46 
47  template <typename T1, typename T2>
48  T1 atomicInc(T1* a, T2 b) {
49  auto ret = *a;
50  if ((*a) < T1(b))
51  (*a)++;
52  return ret;
53  }
54 
55  template <typename T1, typename T2>
56  T1 atomicInc_block(T1* a, T2 b) {
57  return atomicInc(a, b);
58  }
59 
60  template <typename T1, typename T2>
61  T1 atomicAdd(T1* a, T2 b) {
62  auto ret = *a;
63  (*a) += b;
64  return ret;
65  }
66 
67  template <typename T1, typename T2>
68  T1 atomicAdd_block(T1* a, T2 b) {
69  return atomicAdd(a, b);
70  }
71 
72  template <typename T1, typename T2>
73  T1 atomicSub(T1* a, T2 b) {
74  auto ret = *a;
75  (*a) -= b;
76  return ret;
77  }
78 
79  template <typename T1, typename T2>
80  T1 atomicSub_block(T1* a, T2 b) {
81  return atomicSub(a, b);
82  }
83 
84  template <typename T1, typename T2>
85  T1 atomicMin(T1* a, T2 b) {
86  auto ret = *a;
87  *a = std::min(*a, T1(b));
88  return ret;
89  }
90 
91  template <typename T1, typename T2>
92  T1 atomicMin_block(T1* a, T2 b) {
93  return atomicMin(a, b);
94  }
95 
96  template <typename T1, typename T2>
97  T1 atomicMax(T1* a, T2 b) {
98  auto ret = *a;
99  *a = std::max(*a, T1(b));
100  return ret;
101  }
102 
103  template <typename T1, typename T2>
104  T1 atomicMax_block(T1* a, T2 b) {
105  return atomicMax(a, b);
106  }
107 
108  inline void __syncthreads() {}
109  inline void __threadfence() {}
110  inline bool __syncthreads_or(bool x) { return x; }
111  inline bool __syncthreads_and(bool x) { return x; }
112  template <typename T>
113  inline T __ldg(T const* x) {
114  return *x;
115  }
116 
117  } // namespace cudacompat
118 } // namespace cms
119 
120 // make the cudacompat implementation available in the global namespace
121 using namespace cms::cudacompat;
122 
123 #endif // __CUDACC__
124 
125 #endif // HeterogeneousCore_CUDAUtilities_interface_cudaCompat_h
const dim3 threadIdx
Definition: cudaCompat.h:29
T1 atomicMax(T1 *a, T2 b)
Definition: cudaCompat.h:97
bool __syncthreads_or(bool x)
Definition: cudaCompat.h:110
const dim3 gridDim
Definition: cudaCompat.h:33
T1 atomicCAS(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:36
T1 atomicSub(T1 *a, T2 b)
Definition: cudaCompat.h:73
ret
prodAgent to be discontinued
const dim3 blockDim
Definition: cudaCompat.h:30
T1 atomicSub_block(T1 *a, T2 b)
Definition: cudaCompat.h:80
T1 atomicInc(T1 *a, T2 b)
Definition: cudaCompat.h:48
T1 atomicCAS_block(T1 *address, T1 compare, T2 val)
Definition: cudaCompat.h:43
const dim3 blockIdx
Definition: cudaCompat.h:32
Namespace of DDCMS conversion namespace.
T1 atomicMin_block(T1 *a, T2 b)
Definition: cudaCompat.h:92
T __ldg(T const *x)
Definition: cudaCompat.h:113
T1 atomicInc_block(T1 *a, T2 b)
Definition: cudaCompat.h:56
double b
Definition: hdecay.h:118
T1 atomicAdd_block(T1 *a, T2 b)
Definition: cudaCompat.h:68
void __syncthreads()
Definition: cudaCompat.h:108
double a
Definition: hdecay.h:119
float x
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:111
T1 atomicMin(T1 *a, T2 b)
Definition: cudaCompat.h:85
long double T
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
void __threadfence()
Definition: cudaCompat.h:109
T1 atomicMax_block(T1 *a, T2 b)
Definition: cudaCompat.h:104