src
HeterogeneousTest
ROCmDevice
src
DeviceAddition.hip.cc
Go to the documentation of this file.
1
#include <cstddef>
2
#include <cstdint>
3
4
#include <hip/hip_runtime.h>
5
6
#include "
HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h
"
7
8
namespace
cms::rocmtest
{
9
10
__device__
void
add_vectors_f
(
const
float
* __restrict__ in1,
11
const
float
* __restrict__ in2,
12
float
* __restrict__
out
,
13
size_t
size
) {
14
uint32_t thread =
threadIdx
.x +
blockIdx
.x *
blockDim
.x;
15
uint32_t
stride
=
blockDim
.x *
gridDim
.x;
16
17
for
(
size_t
i
= thread;
i
<
size
;
i
+=
stride
) {
18
out
[
i
] = in1[
i
] + in2[
i
];
19
}
20
}
21
22
__device__
void
add_vectors_d
(
const
double
* __restrict__ in1,
23
const
double
* __restrict__ in2,
24
double
* __restrict__
out
,
25
size_t
size
) {
26
uint32_t thread =
threadIdx
.x +
blockIdx
.x *
blockDim
.x;
27
uint32_t
stride
=
blockDim
.x *
gridDim
.x;
28
29
for
(
size_t
i
= thread;
i
<
size
;
i
+=
stride
) {
30
out
[
i
] = in1[
i
] + in2[
i
];
31
}
32
}
33
34
}
// namespace cms::rocmtest
cms::cudacompat::threadIdx
const dim3 threadIdx
Definition:
cudaCompat.h:29
findQualityFiles.size
size
Write out results.
Definition:
findQualityFiles.py:443
cms::rocmtest::add_vectors_f
__device__ void add_vectors_f(const float *__restrict__ in1, const float *__restrict__ in2, float *__restrict__ out, size_t size)
Definition:
DeviceAddition.hip.cc:10
mps_fire.i
i
Definition:
mps_fire.py:429
cms::cudacompat::gridDim
const dim3 gridDim
Definition:
cudaCompat.h:33
cms::cudacompat::blockDim
const dim3 blockDim
Definition:
cudaCompat.h:30
cms::cudacompat::blockIdx
const dim3 blockIdx
Definition:
cudaCompat.h:32
MillePedeFileConverter_cfg.out
out
Definition:
MillePedeFileConverter_cfg.py:31
cms::rocmtest::add_vectors_d
__device__ void add_vectors_d(const double *__restrict__ in1, const double *__restrict__ in2, double *__restrict__ out, size_t size)
Definition:
DeviceAddition.hip.cc:22
cms::rocmtest
Definition:
requireDevices.h:8
DeviceAddition.h
gpuPixelDoublets::stride
auto stride
Definition:
gpuPixelDoubletsAlgos.h:170
__device__
#define __device__
Definition:
SiPixelGainForHLTonGPU.h:15
Generated for CMSSW Reference Manual by
1.8.14