d5/d31/SiStripRawToClusterGPUKernel_8cc_source.html

 #include "EventFilter/SiStripRawToDigi/interface/SiStripFEDBuffer.h"
 #include "DataFormats/Common/interface/DetSetVectorNew.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
 #include "RecoLocalTracker/SiStripClusterizer/interface/ClusterChargeCut.h"

 #include "SiStripRawToClusterGPUKernel.h"

 #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
 #include "ChannelLocsGPU.h"
 #include "StripDataView.h"

 namespace stripgpu {
   StripDataGPU::StripDataGPU(size_t size, cudaStream_t stream) {
     alldataGPU_ = cms::cuda::make_device_unique<uint8_t[]>(size, stream);
     channelGPU_ = cms::cuda::make_device_unique<uint16_t[]>(size, stream);
     stripIdGPU_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(size, stream);
   }

   SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel(const edm::ParameterSet& conf)
       : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
         channelThreshold_(conf.getParameter<double>("ChannelThreshold")),
         seedThreshold_(conf.getParameter<double>("SeedThreshold")),
         clusterThresholdSquared_(std::pow(conf.getParameter<double>("ClusterThreshold"), 2.0f)),
         maxSequentialHoles_(conf.getParameter<unsigned>("MaxSequentialHoles")),
         maxSequentialBad_(conf.getParameter<unsigned>("MaxSequentialBad")),
         maxAdjacentBad_(conf.getParameter<unsigned>("MaxAdjacentBad")),
         maxClusterSize_(conf.getParameter<unsigned>("MaxClusterSize")),
         minGoodCharge_(clusterChargeCut(conf)) {
     fedRawDataOffsets_.reserve(sistrip::NUMBER_OF_FEDS);
   }

   void SiStripRawToClusterGPUKernel::makeAsync(const std::vector<const FEDRawData*>& rawdata,
                                                const std::vector<std::unique_ptr<sistrip::FEDBuffer>>& buffers,
                                                const SiStripClusterizerConditionsGPU& conditions,
                                                cudaStream_t stream) {
     size_t totalSize{0};
     for (const auto& buff : buffers) {
       if (buff != nullptr) {
         totalSize += buff->bufferSize();
       }
     }

     auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize, stream);
     auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize, stream);

     size_t off = 0;
     fedRawDataOffsets_.clear();
     fedIndex_.clear();
     fedIndex_.resize(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed);

     sistrip::FEDReadoutMode mode = sistrip::READOUT_MODE_INVALID;

     for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
       auto& buff = buffers[fedi];
       if (buff != nullptr) {
         const auto raw = rawdata[fedi];
         memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
         fedIndex_[stripgpu::fedIndex(fedi)] = fedRawDataOffsets_.size();
         fedRawDataOffsets_.push_back(off);
         off += raw->size();
         if (fedRawDataOffsets_.size() == 1) {
           mode = buff->readoutMode();
         } else {
           if (buff->readoutMode() != mode) {
             throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
                 << buff->readoutMode() << " != " << mode;
           }
         }
       }
     }
     // send rawdata to GPU
     cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);

     const auto& detmap = conditions.detToFeds();
     if ((mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED) && (mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10)) {
       throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
     }
     const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
     size_t offset = 0;
     auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(), stream);
     auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(), stream);

     // iterate over the detector in DetID/APVPair order
     // mapping out where the data are
     for (size_t i = 0; i < detmap.size(); ++i) {
       const auto& detp = detmap[i];
       const auto fedId = detp.fedID();
       const auto fedCh = detp.fedCh();
       const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];

       if (fedi != invalidFed) {
         const auto buffer = buffers[fedId].get();
         const auto& channel = buffer->channel(detp.fedCh());

         auto len = channel.length();
         auto off = channel.offset();

         assert(len >= headerlen || len == 0);

         if (len >= headerlen) {
           len -= headerlen;
           off += headerlen;
         }

         chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
         inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
         offset += len;

       } else {
         chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
         inputGPU[i] = nullptr;
       }
     }

     const auto n_strips = offset;

     sst_data_d_ = cms::cuda::make_host_unique<StripDataView>(stream);
     sst_data_d_->nStrips = n_strips;

     chanlocsGPU_ = std::make_unique<ChannelLocsGPU>(detmap.size(), stream);
     chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);

     stripdata_ = std::make_unique<StripDataGPU>(n_strips, stream);

     const auto& condGPU = conditions.getGPUProductAsync(stream);

     unpackChannelsGPU(condGPU.deviceView(), stream);
 #ifdef GPU_CHECK
     cudaCheck(cudaStreamSynchronize(stream));
 #endif

 #ifdef EDM_ML_DEBUG
     auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips, stream);
     cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
     cudaCheck(cudaStreamSynchronize(stream));

     constexpr int xor3bits = 7;
     for (size_t i = 0; i < chanlocs->size(); ++i) {
       const auto data = chanlocs->input(i);
       const auto len = chanlocs->length(i);

       if (data != nullptr && len > 0) {
         auto aoff = chanlocs->offset(i);
         auto choff = chanlocs->inoff(i);
         const auto end = choff + len;

         while (choff < end) {
           const auto stripIndex = data[choff++ ^ xor3bits];
           const auto groupLength = data[choff++ ^ xor3bits];
           aoff += 2;
           for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
             if (data[choff ^ xor3bits] != outdata[aoff]) {
               LogDebug("SiStripRawToClusterGPUKernel")
                   << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
                   << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
             }
           }
         }
       }
     }
     outdata.reset(nullptr);
 #endif

     fedRawDataGPU.reset();
     allocateSSTDataGPU(n_strips, stream);
     setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);

     clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
     findClusterGPU(condGPU.deviceView(), stream);

     stripdata_.reset();
   }

   SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
     reset();

     return std::move(clusters_d_);
   }

   void SiStripRawToClusterGPUKernel::reset() {
     chanlocsGPU_.reset();
     sst_data_d_.reset();
   }
 }  // namespace stripgpu
stripgpu::invalidFed
static constexpr fedId_t invalidFed
Definition: SiStripTypes.h:15

sistrip::NUMBER_OF_FEDS
static const uint16_t NUMBER_OF_FEDS
Definition: ConstantsForHardwareSystems.h:24

stripgpu::SiStripRawToClusterGPUKernel::maxClusterSize_
uint32_t maxClusterSize_
Definition: SiStripRawToClusterGPUKernel.h:73

mps_fire.i
i
Definition: mps_fire.py:429

stripgpu::SiStripRawToClusterGPUKernel::allocateSSTDataGPU
void allocateSSTDataGPU(int max_strips, cudaStream_t stream)

stripgpu::SiStripRawToClusterGPUKernel::reset
void reset()
Definition: SiStripRawToClusterGPUKernel.cc:181

Exception
Definition: hltDiff.cc:245

stripgpu::SiStripRawToClusterGPUKernel::chanlocsGPU_
std::unique_ptr< ChannelLocsGPU > chanlocsGPU_
Definition: SiStripRawToClusterGPUKernel.h:65

DetSetVectorNew.h

ChannelLocsGPU.h

edmScanValgrind.buffer
buffer
Definition: edmScanValgrind.py:171

submitPVValidationJobs.conditions
list conditions
Definition: submitPVValidationJobs.py:679

stripgpu::SiStripRawToClusterGPUKernel::makeAsync
void makeAsync(const std::vector< const FEDRawData *> &rawdata, const std::vector< std::unique_ptr< sistrip::FEDBuffer >> &buffers, const SiStripClusterizerConditionsGPU &conditions, cudaStream_t stream)
Definition: SiStripRawToClusterGPUKernel.cc:33

std
Definition: JetResolutionObject.h:76

stripgpu::StripDataGPU::stripIdGPU_
cms::cuda::device::unique_ptr< stripgpu::stripId_t[]> stripIdGPU_
Definition: SiStripRawToClusterGPUKernel.h:35

stripgpu::SiStripRawToClusterGPUKernel::findClusterGPU
void findClusterGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)

stripgpu::StripDataGPU::StripDataGPU
StripDataGPU(size_t size, cudaStream_t stream)
Definition: SiStripRawToClusterGPUKernel.cc:14

cms::cuda::stream
uint32_t T const  *__restrict__ uint32_t const  *__restrict__ int32_t int Histo::index_type cudaStream_t stream
Definition: HistoContainer.h:51

sistrip::READOUT_MODE_ZERO_SUPPRESSED
Definition: SiStripFEDBufferComponents.h:56

ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr
if constexpr(n > 3)
Definition: BrokenLine.h:164

cms::cuda::assert
assert(be >=bs)

sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10
Definition: SiStripFEDBufferComponents.h:49

SiStripClustersCUDADevice
Definition: SiStripClustersCUDA.h:11

sistrip
sistrip classes
Definition: EnsembleCalibrationLA.cc:9

stripgpu::StripDataGPU::alldataGPU_
cms::cuda::device::unique_ptr< uint8_t[]> alldataGPU_
Definition: SiStripRawToClusterGPUKernel.h:33

ClusterChargeCut.h

stripgpu::SiStripRawToClusterGPUKernel::fedIndex_
std::vector< stripgpu::fedId_t > fedIndex_
Definition: SiStripRawToClusterGPUKernel.h:61

sistrip::READOUT_MODE_INVALID
Definition: SiStripFEDBufferComponents.h:46

ALCARECOPromptCalibProdSiPixelAli0T_cff.mode
mode
Definition: ALCARECOPromptCalibProdSiPixelAli0T_cff.py:96

sistrip::FEDReadoutMode
FEDReadoutMode
Definition: SiStripFEDBufferComponents.h:45

stripgpu::stripIndex
__host__ __device__ std::uint32_t stripIndex(fedId_t fed, fedCh_t channel, stripId_t strip)
Definition: SiStripClusterizerConditionsGPU.h:19

stripgpu::SiStripRawToClusterGPUKernel::getResults
SiStripClustersCUDADevice getResults(cudaStream_t stream)
Definition: SiStripRawToClusterGPUKernel.cc:175

stripgpu
Definition: SiStripClusterizerConditionsGPU.h:17

stripgpu::SiStripRawToClusterGPUKernel::unpackChannelsGPU
void unpackChannelsGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)

stripgpu::SiStripRawToClusterGPUKernel::sst_data_d_
cms::cuda::host::unique_ptr< StripDataView > sst_data_d_
Definition: SiStripRawToClusterGPUKernel.h:67

cms::cuda::copyAsync
void copyAsync(device::unique_ptr< T > &dst, const host::unique_ptr< T > &src, cudaStream_t stream)
Definition: copyAsync.h:20

host_unique_ptr.h

f
double f[11][100]
Definition: MuScleFitUtils.cc:78

stripgpu::SiStripClusterizerConditionsGPU
Definition: SiStripClusterizerConditionsGPU.h:31

stripgpu::SiStripRawToClusterGPUKernel::fedRawDataOffsets_
std::vector< size_t > fedRawDataOffsets_
Definition: SiStripRawToClusterGPUKernel.h:62

stripgpu::invalidDet
static constexpr detId_t invalidDet
Definition: SiStripTypes.h:14

stripgpu::kMaxSeedStrips
constexpr auto kMaxSeedStrips
Definition: StripDataView.h:12

stripgpu::StripDataGPU::channelGPU_
cms::cuda::device::unique_ptr< uint16_t[]> channelGPU_
Definition: SiStripRawToClusterGPUKernel.h:34

copyAsync.h

SiStripFEDBuffer.h

clusterChargeCut
float clusterChargeCut(const edm::ParameterSet &conf, const char *name="clusterChargeCut")
Definition: ClusterChargeCut.h:7

SiStripClusterizerConditionsGPU.h

stripgpu::SiStripRawToClusterGPUKernel::clusters_d_
SiStripClustersCUDADevice clusters_d_
Definition: SiStripRawToClusterGPUKernel.h:70

data
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

trackerHitRTTI::vector
Definition: trackerHitRTTI.h:21

l1tstage2_dqm_sourceclient-live_cfg.fedId
fedId
Definition: l1tstage2_dqm_sourceclient-live_cfg.py:88

edm::ParameterSet
Definition: ParameterSet.h:48

SiStripRawToClusterGPUKernel.h

StripDataView.h

cudaCheck
#define cudaCheck(ARG,...)
Definition: cudaCheck.h:69

stripgpu::SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel
SiStripRawToClusterGPUKernel(const edm::ParameterSet &conf)
Definition: SiStripRawToClusterGPUKernel.cc:20

stripgpu::fedIndex
__host__ __device__ fedId_t fedIndex(fedId_t fed)
Definition: SiStripClusterizerConditionsGPU.h:18

funct::pow
Power< A, B >::type pow(const A &a, const B &b)
Definition: Power.h:29

eostools.move
def move(src, dest)
Definition: eostools.py:511

hltrates_dqm_sourceclient-live_cfg.offset
offset
Definition: hltrates_dqm_sourceclient-live_cfg.py:83

stripgpu::SiStripRawToClusterGPUKernel::setSeedStripsNCIndexGPU
void setSeedStripsNCIndexGPU(const ConditionsDeviceView *conditions, cudaStream_t stream)

dqmdumpme.k
k
Definition: dqmdumpme.py:60

stripgpu::SiStripRawToClusterGPUKernel::stripdata_
std::unique_ptr< StripDataGPU > stripdata_
Definition: SiStripRawToClusterGPUKernel.h:64

LogDebug
#define LogDebug(id)
Definition: MessageLogger.h:241