38 for (
const auto& buff : buffers) {
39 if (buff !=
nullptr) {
40 totalSize += buff->bufferSize();
44 auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize,
stream);
45 auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize,
stream);
54 for (
size_t fedi = 0; fedi < buffers.size(); ++fedi) {
55 auto& buff = buffers[fedi];
56 if (buff !=
nullptr) {
57 const auto raw = rawdata[fedi];
58 memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
63 mode = buff->readoutMode();
65 if (buff->readoutMode() !=
mode) {
66 throw cms::Exception(
"[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
67 << buff->readoutMode() <<
" != " <<
mode;
77 throw cms::Exception(
"[SiStripRawToClusterGPUKernel] unsupported readout mode ") <<
mode;
81 auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(),
stream);
82 auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(),
stream);
86 for (
size_t i = 0;
i < detmap.size(); ++
i) {
87 const auto& detp = detmap[
i];
88 const auto fedId = detp.fedID();
89 const auto fedCh = detp.fedCh();
94 const auto& channel =
buffer->channel(detp.fedCh());
96 auto len = channel.length();
97 auto off = channel.offset();
99 assert(len >= headerlen || len == 0);
101 if (len >= headerlen) {
106 chanlocs->setChannelLoc(
i, channel.data(), off,
offset, len,
fedId, fedCh, detp.detID());
112 inputGPU[
i] =
nullptr;
116 const auto n_strips =
offset;
134 auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips,
stream);
138 constexpr
int xor3bits = 7;
139 for (
size_t i = 0;
i < chanlocs->size(); ++
i) {
140 const auto data = chanlocs->input(
i);
141 const auto len = chanlocs->length(
i);
143 if (
data !=
nullptr && len > 0) {
144 auto aoff = chanlocs->offset(
i);
145 auto choff = chanlocs->inoff(
i);
146 const auto end = choff + len;
148 while (choff <
end) {
150 const auto groupLength =
data[choff++ ^ xor3bits];
152 for (
auto k = 0;
k < groupLength; ++
k, ++choff, ++aoff) {
153 if (
data[choff ^ xor3bits] != outdata[aoff]) {
154 LogDebug(
"SiStripRawToClusterGPUKernel")
155 <<
"Strip mismatch " <<
stripIndex <<
" i:k " <<
i <<
":" <<
k <<
" " 156 << (uint32_t)
data[choff ^ xor3bits] <<
" != " << (uint32_t)outdata[aoff] << std::endl;
162 outdata.reset(
nullptr);
165 fedRawDataGPU.reset();
static constexpr fedId_t invalidFed
static const uint16_t NUMBER_OF_FEDS
void allocateSSTDataGPU(int max_strips, cudaStream_t stream)
std::unique_ptr< ChannelLocsGPU > chanlocsGPU_
void findClusterGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
std::vector< stripgpu::fedId_t > fedIndex_
__host__ __device__ std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip)
void unpackChannelsGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
cms::cuda::host::unique_ptr< StripDataView > sst_data_d_
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
std::vector< size_t > fedRawDataOffsets_
static constexpr detId_t invalidDet
constexpr auto kMaxSeedStrips
SiStripClustersCUDADevice clusters_d_
char data[epos_bytes_allocation]
#define cudaCheck(ARG,...)
__host__ __device__ fedId_t fedIndex(fedId_t fed)
void setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
std::unique_ptr< StripDataGPU > stripdata_