CMS 3D CMS Logo

SiStripRawToClusterGPUKernel.cc
Go to the documentation of this file.
6 
8 
10 #include "ChannelLocsGPU.h"
11 #include "StripDataView.h"
12 
13 namespace stripgpu {
14  StripDataGPU::StripDataGPU(size_t size, cudaStream_t stream) {
15  alldataGPU_ = cms::cuda::make_device_unique<uint8_t[]>(size, stream);
16  channelGPU_ = cms::cuda::make_device_unique<uint16_t[]>(size, stream);
17  stripIdGPU_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(size, stream);
18  }
19 
21  : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
22  channelThreshold_(conf.getParameter<double>("ChannelThreshold")),
23  seedThreshold_(conf.getParameter<double>("SeedThreshold")),
24  clusterThresholdSquared_(std::pow(conf.getParameter<double>("ClusterThreshold"), 2.0f)),
25  maxSequentialHoles_(conf.getParameter<unsigned>("MaxSequentialHoles")),
26  maxSequentialBad_(conf.getParameter<unsigned>("MaxSequentialBad")),
27  maxAdjacentBad_(conf.getParameter<unsigned>("MaxAdjacentBad")),
28  maxClusterSize_(conf.getParameter<unsigned>("MaxClusterSize")),
29  minGoodCharge_(clusterChargeCut(conf)) {
31  }
32 
33  void SiStripRawToClusterGPUKernel::makeAsync(const std::vector<const FEDRawData*>& rawdata,
34  const std::vector<std::unique_ptr<sistrip::FEDBuffer>>& buffers,
36  cudaStream_t stream) {
37  size_t totalSize{0};
38  for (const auto& buff : buffers) {
39  if (buff != nullptr) {
40  totalSize += buff->bufferSize();
41  }
42  }
43 
44  auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize, stream);
45  auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize, stream);
46 
47  size_t off = 0;
48  fedRawDataOffsets_.clear();
49  fedIndex_.clear();
51 
53 
54  for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
55  auto& buff = buffers[fedi];
56  if (buff != nullptr) {
57  const auto raw = rawdata[fedi];
58  memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
60  fedRawDataOffsets_.push_back(off);
61  off += raw->size();
62  if (fedRawDataOffsets_.size() == 1) {
63  mode = buff->readoutMode();
64  } else {
65  if (buff->readoutMode() != mode) {
66  throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
67  << buff->readoutMode() << " != " << mode;
68  }
69  }
70  }
71  }
72  // send rawdata to GPU
73  cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);
74 
75  const auto& detmap = conditions.detToFeds();
77  throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
78  }
79  const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
80  size_t offset = 0;
81  auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(), stream);
82  auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(), stream);
83 
84  // iterate over the detector in DetID/APVPair order
85  // mapping out where the data are
86  for (size_t i = 0; i < detmap.size(); ++i) {
87  const auto& detp = detmap[i];
88  const auto fedId = detp.fedID();
89  const auto fedCh = detp.fedCh();
90  const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];
91 
92  if (fedi != invalidFed) {
93  const auto buffer = buffers[fedId].get();
94  const auto& channel = buffer->channel(detp.fedCh());
95 
96  auto len = channel.length();
97  auto off = channel.offset();
98 
99  assert(len >= headerlen || len == 0);
100 
101  if (len >= headerlen) {
102  len -= headerlen;
103  off += headerlen;
104  }
105 
106  chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
107  inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
108  offset += len;
109 
110  } else {
111  chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
112  inputGPU[i] = nullptr;
113  }
114  }
115 
116  const auto n_strips = offset;
117 
118  sst_data_d_ = cms::cuda::make_host_unique<StripDataView>(stream);
119  sst_data_d_->nStrips = n_strips;
120 
121  chanlocsGPU_ = std::make_unique<ChannelLocsGPU>(detmap.size(), stream);
122  chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);
123 
124  stripdata_ = std::make_unique<StripDataGPU>(n_strips, stream);
125 
126  const auto& condGPU = conditions.getGPUProductAsync(stream);
127 
128  unpackChannelsGPU(condGPU.deviceView(), stream);
129 #ifdef GPU_CHECK
130  cudaCheck(cudaStreamSynchronize(stream));
131 #endif
132 
133 #ifdef EDM_ML_DEBUG
134  auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips, stream);
135  cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
136  cudaCheck(cudaStreamSynchronize(stream));
137 
138  constexpr int xor3bits = 7;
139  for (size_t i = 0; i < chanlocs->size(); ++i) {
140  const auto data = chanlocs->input(i);
141  const auto len = chanlocs->length(i);
142 
143  if (data != nullptr && len > 0) {
144  auto aoff = chanlocs->offset(i);
145  auto choff = chanlocs->inoff(i);
146  const auto end = choff + len;
147 
148  while (choff < end) {
149  const auto stripIndex = data[choff++ ^ xor3bits];
150  const auto groupLength = data[choff++ ^ xor3bits];
151  aoff += 2;
152  for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
153  if (data[choff ^ xor3bits] != outdata[aoff]) {
154  LogDebug("SiStripRawToClusterGPUKernel")
155  << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
156  << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
157  }
158  }
159  }
160  }
161  }
162  outdata.reset(nullptr);
163 #endif
164 
165  fedRawDataGPU.reset();
166  allocateSSTDataGPU(n_strips, stream);
167  setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
168 
170  findClusterGPU(condGPU.deviceView(), stream);
171 
172  stripdata_.reset();
173  }
174 
176  reset();
177 
178  return std::move(clusters_d_);
179  }
180 
182  chanlocsGPU_.reset();
183  sst_data_d_.reset();
184  }
185 } // namespace stripgpu
static constexpr fedId_t invalidFed
Definition: SiStripTypes.h:15
static const uint16_t NUMBER_OF_FEDS
void allocateSSTDataGPU(int max_strips, cudaStream_t stream)
std::unique_ptr< ChannelLocsGPU > chanlocsGPU_
void makeAsync(const std::vector< const FEDRawData *> &rawdata, const std::vector< std::unique_ptr< sistrip::FEDBuffer >> &buffers, const SiStripClusterizerConditionsGPU &conditions, cudaStream_t stream)
cms::cuda::device::unique_ptr< stripgpu::stripId_t[]> stripIdGPU_
void findClusterGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
StripDataGPU(size_t size, cudaStream_t stream)
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t stream
assert(be >=bs)
sistrip classes
cms::cuda::device::unique_ptr< uint8_t[]> alldataGPU_
__host__ __device__ std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip)
SiStripClustersCUDADevice getResults(cudaStream_t stream)
void unpackChannelsGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
cms::cuda::host::unique_ptr< StripDataView > sst_data_d_
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20
double f[11][100]
static constexpr detId_t invalidDet
Definition: SiStripTypes.h:14
constexpr auto kMaxSeedStrips
Definition: StripDataView.h:12
cms::cuda::device::unique_ptr< uint16_t[]> channelGPU_
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69
SiStripRawToClusterGPUKernel(const edm::ParameterSet &conf)
__host__ __device__ fedId_t fedIndex(fedId_t fed)
Power< A, B >::type pow(const A &a, const B &b)
Definition: Power.h:29
def move(src, dest)
Definition: eostools.py:511
void setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)
#define LogDebug(id)