CMS 3D CMS Logo

Namespaces | Functions | Variables
gpuClustering Namespace Reference

Namespaces

 pixelStatus
 

Functions

template<typename TrackerTraits >
 __attribute__ ((always_inline)) void clusterChargeCut(SiPixelClusterThresholds clusterThresholds
 
 assert (TrackerTraits::numberOfModules< maxNumModules)
 
 for (auto module=firstModule;module< endModule;module+=gridDim.x)
 
constexpr uint32_t maxHitsInIter ()
 

Variables

uint16_t *__restrict__ uint16_t const *__restrict__ adc
 
__shared__ int32_t charge [maxNumClustersPerModules]
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
 
constexpr uint16_t clusterThresholdLayerOne = 2000
 
constexpr uint16_t clusterThresholdOtherLayers = 4000
 
auto endModule = moduleStart[0]
 
auto firstModule = blockIdx.x
 
uint16_t *__restrict__ id
 
constexpr int invalidClusterId = -9999
 
constexpr uint16_t invalidModuleId = std::numeric_limits<uint16_t>::max() - 1
 
constexpr uint32_t maxNumDigis = 3 * 256 * 1024
 
constexpr uint16_t maxNumModules = 4000
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
 
__shared__ int msize
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
 
__shared__ uint16_t newclusId [maxNumClustersPerModules]
 
constexpr int nMaxModules = TrackerTraits::numberOfModules
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
 
__shared__ uint8_t ok [maxNumClustersPerModules]
 
constexpr uint32_t pixelSizeX = 160
 
constexpr uint32_t pixelSizeY = 416
 
constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : pixelStatus::size
 
constexpr int startBPIX2 = TrackerTraits::layerStart[1]
 
__shared__ uint32_t status [pixelStatusSize]
 
uint16_t *__restrict__ uint16_t const *__restrict__ x
 
uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ y
 

Function Documentation

◆ __attribute__()

template<typename TrackerTraits >
gpuClustering::__attribute__ ( (always_inline)  )
inline

◆ assert()

gpuClustering::assert ( )

Referenced by for().

◆ for()

gpuClustering::for ( )

Definition at line 39 of file gpuClusterChargeCut.h.

References cms::cudacompat::__syncthreads(), cms::cudacompat::__syncthreads_and(), adc, assert(), cms::cudacompat::atomicAdd(), cms::cudacompat::blockDim, cms::cudacompat::blockIdx, cms::alpakatools::blockPrefixScan(), charge, DMR_cfg::chargeCut, clusterId, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), dqmdumpme::first, SiPixelClusterThresholds::getThresholdForLayerOnCondition(), caHitNtupletGeneratorKernels::good, mps_fire::i, invalidModuleId, pixelClustering::maxNumClustersPerModules, callgraph::module, moduleId, moduleStart, nClustersInModule, newclusId, phase1PixelTopology::numberOfModules, numElements, hltrates_dqm_sourceclient-live_cfg::offset, ok, startBPIX2, cms::cudacompat::threadIdx, and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

39  {
40  auto firstPixel = moduleStart[1 + module];
41  auto thisModuleId = id[firstPixel];
42  while (thisModuleId == invalidModuleId and firstPixel < numElements) {
43  // skip invalid or duplicate pixels
44  ++firstPixel;
45  thisModuleId = id[firstPixel];
46  }
47  if (firstPixel >= numElements) {
48  // reached the end of the input while skipping the invalid pixels, nothing left to do
49  break;
50  }
51  if (thisModuleId != moduleId[module]) {
52  // reached the end of the module while skipping the invalid pixels, skip this module
53  continue;
54  }
55  assert(thisModuleId < TrackerTraits::numberOfModules);
56 
57  auto nclus = nClustersInModule[thisModuleId];
58  if (nclus == 0)
59  continue;
60 
61  if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
62  printf("Warning too many clusters in module %d in block %d: %d > %d\n",
63  thisModuleId,
64  blockIdx.x,
65  nclus,
67 
68  auto first = firstPixel + threadIdx.x;
69 
70  if (nclus > maxNumClustersPerModules) {
71  // remove excess FIXME find a way to cut charge first....
72  for (auto i = first; i < numElements; i += blockDim.x) {
73  if (id[i] == invalidModuleId)
74  continue; // not valid
75  if (id[i] != thisModuleId)
76  break; // end of module
78  id[i] = invalidModuleId;
80  }
81  }
83  }
84 
85 #ifdef GPU_DEBUG
86  if (thisModuleId % 100 == 1)
87  if (threadIdx.x == 0)
88  printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
89 #endif
90 
92  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
93  charge[i] = 0;
94  }
95  __syncthreads();
96 
97  for (auto i = first; i < numElements; i += blockDim.x) {
98  if (id[i] == invalidModuleId)
99  continue; // not valid
100  if (id[i] != thisModuleId)
101  break; // end of module
103  }
104  __syncthreads();
105 
106  auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
107 
108  bool good = true;
109  for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
110  newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
111  if (0 == ok[i])
112  good = false;
113  }
114 
115  // if all clusters above threshold do nothing
116  if (__syncthreads_and(good))
117  continue;
118 
119  // renumber
120  __shared__ uint16_t ws[32];
121  constexpr auto maxThreads = 1024;
122  auto minClust = nclus > maxThreads ? maxThreads : nclus;
123 
125  if constexpr (maxNumClustersPerModules > maxThreads) //only if needed
126  {
127  for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
129  for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) {
130  uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1;
131  newclusId[i] += newclusId[prevBlockEnd];
132  }
133  __syncthreads();
134  }
135  }
136  assert(nclus > newclusId[nclus - 1]);
137 
138  nClustersInModule[thisModuleId] = newclusId[nclus - 1];
139 
140  // reassign id
141  for (auto i = first; i < numElements; i += blockDim.x) {
142  if (id[i] == invalidModuleId)
143  continue; // not valid
144  if (id[i] != thisModuleId)
145  break; // end of module
146  if (0 == ok[clusterId[i]])
147  clusterId[i] = id[i] = invalidModuleId;
148  else
149  clusterId[i] = newclusId[clusterId[i]] - 1;
150  }
151 
152  // done
153  __syncthreads();
154  } // loop on modules
const dim3 threadIdx
Definition: cudaCompat.h:29
__shared__ uint8_t ok[maxNumClustersPerModules]
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ nClustersInModule
__shared__ int32_t charge[maxNumClustersPerModules]
const dim3 blockDim
Definition: cudaCompat.h:30
chargeCut
Definition: DMR_cfg.py:159
constexpr uint16_t numberOfModules
assert(TrackerTraits::numberOfModules< maxNumModules)
constexpr int startBPIX2
const dim3 blockIdx
Definition: cudaCompat.h:32
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ clusterId
constexpr uint16_t invalidModuleId
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ int32_t *__restrict__ uint32_t numElements
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ moduleStart
__shared__ uint16_t newclusId[maxNumClustersPerModules]
void __syncthreads()
Definition: cudaCompat.h:132
constexpr int32_t maxNumClustersPerModules
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan(const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
Definition: prefixScan.h:47
uint16_t *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t const *__restrict__ moduleId
bool __syncthreads_and(bool x)
Definition: cudaCompat.h:135
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61
uint16_t *__restrict__ uint16_t const *__restrict__ adc

◆ maxHitsInIter()

constexpr uint32_t gpuClustering::maxHitsInIter ( )

Definition at line 14 of file gpuClusteringConstants.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::pixelRecHits::GetHits< TrackerTraits >::operator()().

14 { return 160; } //TODO better tuning for PU 140-200

Variable Documentation

◆ adc

uint16_t* __restrict__ uint16_t const* __restrict__ gpuClustering::adc

Definition at line 19 of file gpuClusterChargeCut.h.

Referenced by QIE10Task::_process(), QIE11Task::_process(), PedestalTask::_process(), LEDTask::_process(), DigiTask::_process(), LaserTask::_process(), CastorCoderDb::adc2fC_(), HcalCoderDb::adc2fC_(), CastorTPGCoder::adc2Linear(), HcalTPGCoder::adc2Linear(), HcaluLUTTPGCoder::adc2Linear(), hcal::adc_for_sample(), SiPixelCluster::add(), SiPixelArrayBuffer::add_adc(), ThreeThresholdAlgorithm::addToCandidate(), ESElectronicsSimFast::analogToDigital(), CSCComparatorDigiValidation::analyze(), ZDCQIE10Task::analyze(), ESTimingTask::analyze(), FCDTask::analyze(), SiStripBaselineValidator::analyze(), ZDCTask::analyze(), EcalPreshowerNoiseDistrib::analyze(), CastorDigiStatistics::analyze(), SiStripMonitorDigi::analyze(), HcalDigiStatistics::analyze(), CTPPSPixelDQMSource::analyze(), BtlDigiHitsValidation::analyze(), HGCalDigiValidation::analyze(), EcalSimRawData::analyze(), EcalLaserAnalyzerYousi::analyze(), HGCalTBAnalyzer::analyze(), RecAnalyzerMinbias::analyze(), EcnaAnalyzer::analyze(), CMTRawAnalyzer::analyze(), HGCalTBAnalyzer::analyzeDigi(), SiStripClusterToDigiProducer::applyGain(), ThreeThresholdAlgorithm::applyGains(), CSCFindPeakTime::averageTime(), EcalClusterLazyToolsBase::BasicClusterTime(), FWSiPixelClusterProxyBuilder::build(), RPixDetClusterizer::buildClusters(), DCCTBDataMapper::buildXtalFields(), gpuCalibPixel::calibDigis(), gpuCalibPixel::calibDigisPhase2(), RPixDetClusterizer::calibrate(), PixelThresholdClusterizer::calibrate(), SiStripAPVRestorer::cleaner_LocalMinimumAdder(), hcal::reconstruction::compute_coder_charge(), hcal::reconstruction::compute_diff_charge_gain(), SiTrivialDigitalConverter::convert(), RPixDummyROCSimulator::ConvertChargeToHits(), convertHB(), SiTrivialDigitalConverter::convertRaw(), PixelThresholdClusterizer::copy_to_buffer(), CastorSimpleRecAlgoImpl::corrSaturation(), HGCalDigiValidation::digiValidation(), PulseFitWithShape::doFit(), TPNFit::doFit(), PulseFitWithFunction::doFit(), ESElectronicsSim::encode(), EcalLiteDTUCoder::encode(), EcalCoder::encode(), ESRecHitAnalyticAlgo::EvalAmplitude(), ESRecHitFitAlgo::EvalAmplitude(), ESRecHitSimAlgo::evalAmplitude(), PedestalsTask::fill(), DaqScopeModeTask::fill(), PedsFullNoiseTask::fill(), SiPixelDigiModule::fill(), sistrip::FEDBufferPayloadCreator::fillClusterData(), sistrip::FEDBufferPayloadCreator::fillClusterDataPreMixMode(), CMTRawAnalyzer::fillDigiErrorsHFQIE10(), CMTRawAnalyzer::fillDigiErrorsQIE11(), reco::HcalNoiseInfoProducer::filldigis(), Phase2TrackerMonitorDigi::fillITPixelDigiHistos(), SiPixelCalibDigiProducer::fillPixel(), QcdLowPtDQM::fillPixels(), HLTHcalLaserMisfireFilter::filter(), CSCXonStrip_MatchGatti::findXOnStrip(), JetCoreClusterSplitter::fittingSplit(), CSCFindPeakTime::fivePoleFitCharge(), CSCFindPeakTime::fivePoleFitTime(), SiStripAPVRestorer::flatRegionsFinder(), for(), TSFit::fpol3dg(), generate(), EcalSimRawData::genFeData(), EcalSimRawData::getEbDigi(), HcalQIEManager::getHfQieTable(), HcalLutManager::getLinearizationLutXmlFromAsciiMasterEmap(), HcaluLUTTPGCoder::getMSB(), CSCMake2DRecHit::hitFromStripAndWire(), SiStripAPVRestorer::hybridFormatInspect(), CTPPSPixelDataFormatter::interpretRawData(), PixelDataFormatter::interpretRawData(), SiStripFedZeroSuppression::isAValidDigi(), CastorSimpleRecAlgoImpl::isSaturated(), HcalZSAlgoRealistic::keepMe(), SiPixelDigitizerAlgorithm::lateSignalReweight(), CastorLedAnalysis::LedCastorHists(), HcalLedAnalysis::LedHBHEHists(), HcalLedAnalysis::LedHFHists(), HcalLedAnalysis::LedHOHists(), OptoScanTask::locateTicks(), HcaluLUTTPGCoder::lookupMSB(), SiPixelDigitizerAlgorithm::make_digis(), CSCHitFromStripOnly::makeStripData(), ShallowClustersProducer::NearDigis::NearDigis(), ESRecHitSimAlgo::oldEvalAmplitude(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::raw::Kernel_unpack::operator()(), pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_1d_and_initialize::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_computation_init::operator()(), pixelgpudetails::pack(), pixelDetails::pack(), UHTRpacker::packQIE8sample(), CSCFindPeakTime::parabolaFitTime(), HcalTriggerPrimitiveAlgo::passTDC(), CSCFindPeakTime::peakTime(), PrintRecoObjects::print(), MatacqTBDataFormatter::printData(), MatacqDataFormatter::printData(), EcalFenixStripFgvbEE::process(), EcalFenixStrip::process_part1(), HBHEPhase1Reconstructor::processData(), SiPixelDigisClustersFromSoAAlpaka< TrackerTraits >::produce(), PreMixingSiPixelWorker::put(), PGlobalDigi::putBRL1Digis(), PGlobalDigi::putBRL2Digis(), PGlobalDigi::putBRL3Digis(), PGlobalDigi::putCSCstripDigis(), edm::DataMixingEMDigiWorker::putEM(), PGlobalDigi::putFWD1nDigis(), PGlobalDigi::putFWD1pDigis(), PGlobalDigi::putFWD2nDigis(), PGlobalDigi::putFWD2pDigis(), edm::DataMixingSiPixelWorker::putSiPixel(), PGlobalDigi::putTECW1Digis(), PGlobalDigi::putTECW2Digis(), PGlobalDigi::putTECW3Digis(), PGlobalDigi::putTECW4Digis(), PGlobalDigi::putTECW5Digis(), PGlobalDigi::putTECW6Digis(), PGlobalDigi::putTECW7Digis(), PGlobalDigi::putTECW8Digis(), PGlobalDigi::putTIBL1Digis(), PGlobalDigi::putTIBL2Digis(), PGlobalDigi::putTIBL3Digis(), PGlobalDigi::putTIBL4Digis(), PGlobalDigi::putTIDW1Digis(), PGlobalDigi::putTIDW2Digis(), PGlobalDigi::putTIDW3Digis(), PGlobalDigi::putTOBL1Digis(), PGlobalDigi::putTOBL2Digis(), PGlobalDigi::putTOBL3Digis(), PGlobalDigi::putTOBL4Digis(), TMatacq::rawPulseAnalysis(), EcalPedOffset::readDACs(), EcalPedHists::readEBdigis(), EcalPedHists::readEEdigis(), HcalDigisValidation::reco(), ecaldqm::PresampleTask::runOnDigis(), ecaldqm::LedTask::runOnDigis(), ecaldqm::LaserTask::runOnDigis(), HGCFEElectronics< DFr >::runShaperWithToT(), HGCFEElectronics< DFr >::runSimpleShaper(), BTLElectronicsSim::runTrivialShaper(), ETLElectronicsSim::runTrivialShaper(), HGCFEElectronics< DFr >::runTrivialShaper(), HcalSignalGenerator< HBHEDigitizerTraits >::samplesInPE(), HGCalConcentratorAutoEncoderImpl::select(), SiPixelArrayBuffer::set_adc(), DCUCapsuleTempRawDat::setCapsuleTempADC(), CastorQIEShape::setLowEdges(), HcalQIEShape::setLowEdges(), TPNPulse::setPulse(), TAPDPulse::setPulse(), QIE10DataFrame::setSample(), QIE11DataFrame::setSample(), SiPixelCluster::SiPixelCluster(), EBDataFrame::spikeEstimator(), ThreeThresholdAlgorithm::stripByStripAdd(), SiStripApvShotCleaner::subtractCM(), hcaldqm::utilities::sumQ_v10(), SiStripFedZeroSuppression::suppress(), HcalTDC::timing(), SiTrivialDigitalConverter::truncate(), SiTrivialDigitalConverter::truncateRaw(), HcalFiberPattern::unpack(), HcaluLUTTPGCoder::update(), HcalSignalGenerator< HBHEDigitizerTraits >::validDigi(), EcalSignalGenerator< EEDigitizerTraits >::validDigi(), and ESUnpacker::word2digi().

◆ charge

__shared__ int32_t gpuClustering::charge[maxNumClustersPerModules]

Definition at line 28 of file gpuClusterChargeCut.h.

Referenced by for().

◆ clusterId

uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t *__restrict__ int32_t *__restrict__ gpuClustering::clusterId

◆ clusterThresholdLayerOne

constexpr uint16_t gpuClustering::clusterThresholdLayerOne = 2000

◆ clusterThresholdOtherLayers

constexpr uint16_t gpuClustering::clusterThresholdOtherLayers = 4000

◆ endModule

auto gpuClustering::endModule = moduleStart[0]

◆ firstModule

auto gpuClustering::firstModule = blockIdx.x

Definition at line 37 of file gpuClusterChargeCut.h.

◆ id

uint16_t *__restrict__ gpuClustering::id

Definition at line 19 of file gpuClusterChargeCut.h.

◆ invalidClusterId

constexpr int gpuClustering::invalidClusterId = -9999

◆ invalidModuleId

constexpr uint16_t gpuClustering::invalidModuleId = std::numeric_limits<uint16_t>::max() - 1

◆ maxNumDigis

constexpr uint32_t gpuClustering::maxNumDigis = 3 * 256 * 1024

Definition at line 20 of file gpuClusteringConstants.h.

Referenced by SiPixelPhase2DigiToClusterCUDA::acquire().

◆ maxNumModules

constexpr uint16_t gpuClustering::maxNumModules = 4000

◆ moduleId

uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t *__restrict__ gpuClustering::moduleId

◆ moduleStart

uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ gpuClustering::moduleStart

◆ msize

__shared__ int gpuClustering::msize

◆ nClustersInModule

uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ gpuClustering::nClustersInModule

◆ newclusId

__shared__ uint16_t gpuClustering::newclusId[maxNumClustersPerModules]

◆ nMaxModules

constexpr int gpuClustering::nMaxModules = TrackerTraits::numberOfModules

◆ numElements

uint16_t *__restrict__ uint16_t const *__restrict__ uint16_t const *__restrict__ uint32_t const *__restrict__ uint32_t *__restrict__ uint32_t *__restrict__ int32_t *__restrict__ int gpuClustering::numElements

◆ ok

__shared__ uint8_t gpuClustering::ok[maxNumClustersPerModules]

Definition at line 29 of file gpuClusterChargeCut.h.

Referenced by for().

◆ pixelSizeX

constexpr uint32_t gpuClustering::pixelSizeX = 160

Definition at line 18 of file gpuClustering.h.

Referenced by gpuClustering::pixelStatus::getIndex().

◆ pixelSizeY

constexpr uint32_t gpuClustering::pixelSizeY = 416

Definition at line 19 of file gpuClustering.h.

◆ pixelStatusSize

constexpr const uint32_t gpuClustering::pixelStatusSize = isPhase2 ? 1 : pixelStatus::size

Definition at line 108 of file gpuClustering.h.

◆ startBPIX2

constexpr int gpuClustering::startBPIX2 = TrackerTraits::layerStart[1]

◆ status

__shared__ uint32_t gpuClustering::status[pixelStatusSize]

◆ x

uint16_t* __restrict__ uint16_t const* __restrict__ gpuClustering::x

Definition at line 97 of file gpuClustering.h.

◆ y

uint16_t* __restrict__ uint16_t const* __restrict__ uint16_t const* __restrict__ gpuClustering::y

Definition at line 97 of file gpuClustering.h.