#include <SiStripRawToClusterGPUKernel.h>

Public Member Functions
void	copyAsync (cudaStream_t stream)

SiStripClustersCUDADevice	getResults (cudaStream_t stream)

void	makeAsync (const std::vector< const FEDRawData *> &rawdata, const std::vector< std::unique_ptr< sistrip::FEDBuffer >> &buffers, const SiStripClusterizerConditionsGPU &conditions, cudaStream_t stream)

	SiStripRawToClusterGPUKernel (const edm::ParameterSet &conf)

Private Types
using	ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView

Private Member Functions
void	allocateSSTDataGPU (int max_strips, cudaStream_t stream)

void	findClusterGPU (const ConditionsDeviceView *conditions, cudaStream_t stream)

void	freeSSTDataGPU (cudaStream_t stream)

void	reset ()

void	setSeedStripsNCIndexGPU (const ConditionsDeviceView *conditions, cudaStream_t stream)

void	unpackChannelsGPU (const ConditionsDeviceView *conditions, cudaStream_t stream)

Private Attributes
std::unique_ptr< ChannelLocsGPU >	chanlocsGPU_

float	channelThreshold_

SiStripClustersCUDADevice	clusters_d_

float	clusterThresholdSquared_

std::vector< stripgpu::fedId_t >	fedIndex_

std::vector< size_t >	fedRawDataOffsets_

uint8_t	maxAdjacentBad_

uint32_t	maxClusterSize_

uint8_t	maxSequentialBad_

uint8_t	maxSequentialHoles_

float	minGoodCharge_

cms::cuda::device::unique_ptr< StripDataView >	pt_sst_data_d_

float	seedThreshold_

cms::cuda::host::unique_ptr< StripDataView >	sst_data_d_

std::unique_ptr< StripDataGPU >	stripdata_

Detailed Description

Definition at line 40 of file SiStripRawToClusterGPUKernel.h.

Member Typedef Documentation

◆ ConditionsDeviceView

using stripgpu::SiStripRawToClusterGPUKernel::ConditionsDeviceView = SiStripClusterizerConditionsGPU::Data::DeviceView

private

Definition at line 51 of file SiStripRawToClusterGPUKernel.h.

Constructor & Destructor Documentation

◆ SiStripRawToClusterGPUKernel()

stripgpu::SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel ( const edm::ParameterSet & conf )

Definition at line 20 of file SiStripRawToClusterGPUKernel.cc.

References fedRawDataOffsets_, and sistrip::NUMBER_OF_FEDS.

       : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
         channelThreshold_(conf.getParameter<double>("ChannelThreshold")),
         seedThreshold_(conf.getParameter<double>("SeedThreshold")),
         clusterThresholdSquared_(std::pow(conf.getParameter<double>("ClusterThreshold"), 2.0f)),
         maxSequentialHoles_(conf.getParameter<unsigned>("MaxSequentialHoles")),
         maxSequentialBad_(conf.getParameter<unsigned>("MaxSequentialBad")),
         maxAdjacentBad_(conf.getParameter<unsigned>("MaxAdjacentBad")),
         maxClusterSize_(conf.getParameter<unsigned>("MaxClusterSize")),
         minGoodCharge_(clusterChargeCut(conf)) {
     fedRawDataOffsets_.reserve(sistrip::NUMBER_OF_FEDS);
   }

Member Function Documentation

◆ allocateSSTDataGPU()

void stripgpu::SiStripRawToClusterGPUKernel::allocateSSTDataGPU	(	int	max_strips,
		cudaStream_t	stream
	)

private

Referenced by makeAsync().

◆ copyAsync()

void stripgpu::SiStripRawToClusterGPUKernel::copyAsync ( cudaStream_t stream )

◆ findClusterGPU()

void stripgpu::SiStripRawToClusterGPUKernel::findClusterGPU	(	const ConditionsDeviceView *	conditions,
		cudaStream_t	stream
	)

private

Referenced by makeAsync().

◆ freeSSTDataGPU()

void stripgpu::SiStripRawToClusterGPUKernel::freeSSTDataGPU ( cudaStream_t stream )

private

◆ getResults()

SiStripClustersCUDADevice stripgpu::SiStripRawToClusterGPUKernel::getResults ( cudaStream_t stream )

Definition at line 175 of file SiStripRawToClusterGPUKernel.cc.

References clusters_d_, eostools::move(), and reset().

Referenced by SiStripClusterizerFromRawGPU::produce().

                                                                                         {
     reset();
 
     return std::move(clusters_d_);
   }

◆ makeAsync()

void stripgpu::SiStripRawToClusterGPUKernel::makeAsync	(	const std::vector< const FEDRawData *> &	rawdata,
		const std::vector< std::unique_ptr< sistrip::FEDBuffer >> &	buffers,
		const SiStripClusterizerConditionsGPU &	conditions,
		cudaStream_t	stream
	)

Definition at line 33 of file SiStripRawToClusterGPUKernel.cc.

References allocateSSTDataGPU(), cms::cuda::assert(), edmScanValgrind::buffer, chanlocsGPU_, clusters_d_, submitPVValidationJobs::conditions, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), cms::cuda::copyAsync(), cudaCheck, data, Exception, l1tstage2_dqm_sourceclient-live_cfg::fedId, stripgpu::fedIndex(), fedIndex_, fedRawDataOffsets_, findClusterGPU(), mps_fire::i, stripgpu::invalidDet, stripgpu::invalidFed, dqmdumpme::k, stripgpu::kMaxSeedStrips, LogDebug, maxClusterSize_, ALCARECOPromptCalibProdSiPixelAli0T_cff::mode, eostools::move(), sistrip::NUMBER_OF_FEDS, hltrates_dqm_sourceclient-live_cfg::offset, sistrip::READOUT_MODE_INVALID, sistrip::READOUT_MODE_ZERO_SUPPRESSED, sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10, setSeedStripsNCIndexGPU(), sst_data_d_, cms::cuda::stream, stripdata_, stripgpu::stripIndex(), and unpackChannelsGPU().

Referenced by SiStripClusterizerFromRawGPU::acquire().

                                                                     {
     size_t totalSize{0};
     for (const auto& buff : buffers) {
       if (buff != nullptr) {
         totalSize += buff->bufferSize();
       }
     }
 
     auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize, stream);
     auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize, stream);
 
     size_t off = 0;
     fedRawDataOffsets_.clear();
     fedIndex_.clear();
     fedIndex_.resize(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed);
 
     sistrip::FEDReadoutMode mode = sistrip::READOUT_MODE_INVALID;
 
     for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
       auto& buff = buffers[fedi];
       if (buff != nullptr) {
         const auto raw = rawdata[fedi];
         memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
         fedIndex_[stripgpu::fedIndex(fedi)] = fedRawDataOffsets_.size();
         fedRawDataOffsets_.push_back(off);
         off += raw->size();
         if (fedRawDataOffsets_.size() == 1) {
           mode = buff->readoutMode();
         } else {
           if (buff->readoutMode() != mode) {
             throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
                 << buff->readoutMode() << " != " << mode;
           }
         }
       }
     }
     // send rawdata to GPU
     cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);
 
     const auto& detmap = conditions.detToFeds();
     if ((mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED) && (mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10)) {
       throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
     }
     const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
     size_t offset = 0;
     auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(), stream);
     auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(), stream);
 
     // iterate over the detector in DetID/APVPair order
     // mapping out where the data are
     for (size_t i = 0; i < detmap.size(); ++i) {
       const auto& detp = detmap[i];
       const auto fedId = detp.fedID();
       const auto fedCh = detp.fedCh();
       const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];
 
       if (fedi != invalidFed) {
         const auto buffer = buffers[fedId].get();
         const auto& channel = buffer->channel(detp.fedCh());
 
         auto len = channel.length();
         auto off = channel.offset();
 
         assert(len >= headerlen || len == 0);
 
         if (len >= headerlen) {
           len -= headerlen;
           off += headerlen;
         }
 
         chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
         inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
         offset += len;
 
       } else {
         chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
         inputGPU[i] = nullptr;
       }
     }
 
     const auto n_strips = offset;
 
     sst_data_d_ = cms::cuda::make_host_unique<StripDataView>(stream);
     sst_data_d_->nStrips = n_strips;
 
     chanlocsGPU_ = std::make_unique<ChannelLocsGPU>(detmap.size(), stream);
     chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);
 
     stripdata_ = std::make_unique<StripDataGPU>(n_strips, stream);
 
     const auto& condGPU = conditions.getGPUProductAsync(stream);
 
     unpackChannelsGPU(condGPU.deviceView(), stream);
 #ifdef GPU_CHECK
     cudaCheck(cudaStreamSynchronize(stream));
 #endif
 
 #ifdef EDM_ML_DEBUG
     auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips, stream);
     cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
     cudaCheck(cudaStreamSynchronize(stream));
 
     constexpr int xor3bits = 7;
     for (size_t i = 0; i < chanlocs->size(); ++i) {
       const auto data = chanlocs->input(i);
       const auto len = chanlocs->length(i);
 
       if (data != nullptr && len > 0) {
         auto aoff = chanlocs->offset(i);
         auto choff = chanlocs->inoff(i);
         const auto end = choff + len;
 
         while (choff < end) {
           const auto stripIndex = data[choff++ ^ xor3bits];
           const auto groupLength = data[choff++ ^ xor3bits];
           aoff += 2;
           for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
             if (data[choff ^ xor3bits] != outdata[aoff]) {
               LogDebug("SiStripRawToClusterGPUKernel")
                   << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
                   << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
             }
           }
         }
       }
     }
     outdata.reset(nullptr);
 #endif
 
     fedRawDataGPU.reset();
     allocateSSTDataGPU(n_strips, stream);
     setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
 
     clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
     findClusterGPU(condGPU.deviceView(), stream);
 
     stripdata_.reset();
   }

◆ reset()

void stripgpu::SiStripRawToClusterGPUKernel::reset ( void )

private

Definition at line 181 of file SiStripRawToClusterGPUKernel.cc.

References chanlocsGPU_, and sst_data_d_.

Referenced by getResults().

                                            {
     chanlocsGPU_.reset();
     sst_data_d_.reset();
   }

◆ setSeedStripsNCIndexGPU()

void stripgpu::SiStripRawToClusterGPUKernel::setSeedStripsNCIndexGPU	(	const ConditionsDeviceView *	conditions,
		cudaStream_t	stream
	)

private

Referenced by makeAsync().

◆ unpackChannelsGPU()

void stripgpu::SiStripRawToClusterGPUKernel::unpackChannelsGPU	(	const ConditionsDeviceView *	conditions,
		cudaStream_t	stream
	)

private

Referenced by makeAsync().

Member Data Documentation

◆ chanlocsGPU_

std::unique_ptr<ChannelLocsGPU> stripgpu::SiStripRawToClusterGPUKernel::chanlocsGPU_

private

Definition at line 65 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync(), and reset().

◆ channelThreshold_

float stripgpu::SiStripRawToClusterGPUKernel::channelThreshold_

private

Definition at line 71 of file SiStripRawToClusterGPUKernel.h.

◆ clusters_d_

SiStripClustersCUDADevice stripgpu::SiStripRawToClusterGPUKernel::clusters_d_

private

Definition at line 70 of file SiStripRawToClusterGPUKernel.h.

Referenced by getResults(), and makeAsync().

◆ clusterThresholdSquared_

float stripgpu::SiStripRawToClusterGPUKernel::clusterThresholdSquared_

private

Definition at line 71 of file SiStripRawToClusterGPUKernel.h.

◆ fedIndex_

std::vector<stripgpu::fedId_t> stripgpu::SiStripRawToClusterGPUKernel::fedIndex_

private

Definition at line 61 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync().

◆ fedRawDataOffsets_

std::vector<size_t> stripgpu::SiStripRawToClusterGPUKernel::fedRawDataOffsets_

private

Definition at line 62 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync(), and SiStripRawToClusterGPUKernel().

◆ maxAdjacentBad_

uint8_t stripgpu::SiStripRawToClusterGPUKernel::maxAdjacentBad_

private

Definition at line 72 of file SiStripRawToClusterGPUKernel.h.

◆ maxClusterSize_

uint32_t stripgpu::SiStripRawToClusterGPUKernel::maxClusterSize_

private

Definition at line 73 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync().

◆ maxSequentialBad_

uint8_t stripgpu::SiStripRawToClusterGPUKernel::maxSequentialBad_

private

Definition at line 72 of file SiStripRawToClusterGPUKernel.h.

◆ maxSequentialHoles_

uint8_t stripgpu::SiStripRawToClusterGPUKernel::maxSequentialHoles_

private

Definition at line 72 of file SiStripRawToClusterGPUKernel.h.

◆ minGoodCharge_

float stripgpu::SiStripRawToClusterGPUKernel::minGoodCharge_

private

Definition at line 74 of file SiStripRawToClusterGPUKernel.h.

◆ pt_sst_data_d_

cms::cuda::device::unique_ptr<StripDataView> stripgpu::SiStripRawToClusterGPUKernel::pt_sst_data_d_

private

Definition at line 68 of file SiStripRawToClusterGPUKernel.h.

◆ seedThreshold_

float stripgpu::SiStripRawToClusterGPUKernel::seedThreshold_

private

Definition at line 71 of file SiStripRawToClusterGPUKernel.h.

◆ sst_data_d_

cms::cuda::host::unique_ptr<StripDataView> stripgpu::SiStripRawToClusterGPUKernel::sst_data_d_

private

Definition at line 67 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync(), and reset().

◆ stripdata_

std::unique_ptr<StripDataGPU> stripgpu::SiStripRawToClusterGPUKernel::stripdata_

private

Definition at line 64 of file SiStripRawToClusterGPUKernel.h.

Referenced by makeAsync().

Public Member Functions

Private Types

Private Member Functions

Private Attributes

Detailed Description

Member Typedef Documentation

◆ ConditionsDeviceView

Constructor & Destructor Documentation

◆ SiStripRawToClusterGPUKernel()

Member Function Documentation

◆ allocateSSTDataGPU()

◆ copyAsync()

◆ findClusterGPU()

◆ freeSSTDataGPU()

◆ getResults()

◆ makeAsync()

◆ reset()

◆ setSeedStripsNCIndexGPU()

◆ unpackChannelsGPU()

Member Data Documentation

◆ chanlocsGPU_

◆ channelThreshold_

◆ clusters_d_

◆ clusterThresholdSquared_

◆ fedIndex_

◆ fedRawDataOffsets_

◆ maxAdjacentBad_

◆ maxClusterSize_

◆ maxSequentialBad_

◆ maxSequentialHoles_

◆ minGoodCharge_

◆ pt_sst_data_d_

◆ seedThreshold_

◆ sst_data_d_

◆ stripdata_