CMS 3D CMS Logo

EcalUncalibRecHitMultiFitAlgoPortable.dev.cc
Go to the documentation of this file.
1 #include <iostream>
2 #include <limits>
3 #include <alpaka/alpaka.hpp>
4 
6 
10 #include "TimeComputationKernels.h"
11 
12 //#define DEBUG
13 //#define ECAL_RECO_ALPAKA_DEBUG
14 
16 
17  using namespace cms::alpakatools;
18 
19  void launchKernels(Queue& queue,
20  InputProduct const& digisDevEB,
21  InputProduct const& digisDevEE,
22  OutputProduct& uncalibRecHitsDevEB,
23  OutputProduct& uncalibRecHitsDevEE,
24  EcalMultifitConditionsDevice const& conditionsDev,
25  EcalMultifitParametersDevice const& paramsDev,
26  ConfigurationParameters const& configParams) {
27  using digis_type = std::vector<uint16_t>;
28  using dids_type = std::vector<uint32_t>;
29  // according to the cpu setup //----> hardcoded
31  // according to the cpu setup //----> hardcoded
33  auto constexpr kMaxSamples = EcalDataFrame::MAXSAMPLES;
34 
35  auto const ebSize = static_cast<uint32_t>(uncalibRecHitsDevEB.const_view().metadata().size());
36  auto const totalChannels = ebSize + static_cast<uint32_t>(uncalibRecHitsDevEE.const_view().metadata().size());
37 
38  EventDataForScratchDevice scratch(configParams, totalChannels, queue);
39 
40  //
41  // 1d preparation kernel
42  //
43  uint32_t constexpr nchannels_per_block = 32;
44  auto constexpr threads_1d = kMaxSamples * nchannels_per_block;
45  auto const blocks_1d = cms::alpakatools::divide_up_by(totalChannels * kMaxSamples, threads_1d);
46  auto workDivPrep1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_1d, threads_1d);
47  // Since the ::ecal::multifit::X objects are non-dynamic Eigen::Matrix types the returned pointers from the buffers
48  // and the ::ecal::multifit::X* both point to the data.
49  alpaka::exec<Acc1D>(queue,
50  workDivPrep1D,
52  digisDevEB.const_view(),
53  digisDevEE.const_view(),
54  uncalibRecHitsDevEB.view(),
55  uncalibRecHitsDevEE.view(),
56  conditionsDev.const_view(),
57  reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
58  reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
59  scratch.hasSwitchToGain6DevBuf.data(),
60  scratch.hasSwitchToGain1DevBuf.data(),
61  scratch.isSaturatedDevBuf.data(),
62  scratch.acStateDevBuf.data(),
63  reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
66 
67  //
68  // 2d preparation kernel
69  //
70  Vec2D const blocks_2d{1u, totalChannels}; // {y, x} coordiantes
71  Vec2D const threads_2d{kMaxSamples, kMaxSamples};
72  auto workDivPrep2D = cms::alpakatools::make_workdiv<Acc2D>(blocks_2d, threads_2d);
73  alpaka::exec<Acc2D>(queue,
74  workDivPrep2D,
76  digisDevEB.const_view(),
77  digisDevEE.const_view(),
78  conditionsDev.const_view(),
79  reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
80  reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
81  reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
82  scratch.hasSwitchToGain6DevBuf.data(),
83  scratch.hasSwitchToGain1DevBuf.data(),
84  scratch.isSaturatedDevBuf.data());
85 
86  // run minimization kernels
88  digisDevEB,
89  digisDevEE,
90  uncalibRecHitsDevEB,
91  uncalibRecHitsDevEE,
92  scratch,
93  conditionsDev,
94  configParams,
95  totalChannels);
96 
97  if (configParams.shouldRunTimingComputation) {
98  //
99  // TODO: this guy can run concurrently with other kernels,
100  // there is no dependence on the order of execution
101  //
102  auto const blocks_time_init = blocks_1d;
103  auto const threads_time_init = threads_1d;
104  auto workDivTimeCompInit1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_time_init, threads_time_init);
105  alpaka::exec<Acc1D>(queue,
106  workDivTimeCompInit1D,
108  digisDevEB.const_view(),
109  digisDevEE.const_view(),
110  conditionsDev.const_view(),
111  scratch.sample_valuesDevBuf.value().data(),
112  scratch.sample_value_errorsDevBuf.value().data(),
113  scratch.ampMaxErrorDevBuf.value().data(),
114  scratch.useless_sample_valuesDevBuf.value().data(),
115  scratch.pedestal_numsDevBuf.value().data());
116 
117  //
118  // TODO: small kernel only for EB. It needs to be checked if
120  //
121  if (ebSize > 0) {
122  // we are running only over EB digis
123  // therefore we need to create threads/blocks only for that
124  auto const threadsFixMGPA = threads_1d;
125  auto const blocksFixMGPA = cms::alpakatools::divide_up_by(kMaxSamples * ebSize, threadsFixMGPA);
126  auto workDivTimeFixMGPAslew1D = cms::alpakatools::make_workdiv<Acc1D>(blocksFixMGPA, threadsFixMGPA);
127  alpaka::exec<Acc1D>(queue,
128  workDivTimeFixMGPAslew1D,
130  digisDevEB.const_view(),
131  digisDevEE.const_view(),
132  conditionsDev.const_view(),
133  scratch.sample_valuesDevBuf.value().data(),
134  scratch.sample_value_errorsDevBuf.value().data(),
135  scratch.useless_sample_valuesDevBuf.value().data());
136  }
137 
138  auto const threads_nullhypot = threads_1d;
139  auto const blocks_nullhypot = blocks_1d;
140  auto workDivTimeNullhypot1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_nullhypot, threads_nullhypot);
141  alpaka::exec<Acc1D>(queue,
142  workDivTimeNullhypot1D,
144  scratch.sample_valuesDevBuf.value().data(),
145  scratch.sample_value_errorsDevBuf.value().data(),
146  scratch.useless_sample_valuesDevBuf.value().data(),
147  scratch.chi2sNullHypotDevBuf.value().data(),
148  scratch.sum0sNullHypotDevBuf.value().data(),
149  scratch.sumAAsNullHypotDevBuf.value().data(),
150  totalChannels);
151 
152  constexpr uint32_t nchannels_per_block_makeratio = kMaxSamples;
153  constexpr auto nthreads_per_channel =
154  nchannels_per_block_makeratio * (nchannels_per_block_makeratio - 1) / 2; // n(n-1)/2
155  constexpr auto threads_makeratio = nthreads_per_channel * nchannels_per_block_makeratio;
156  auto const blocks_makeratio =
157  cms::alpakatools::divide_up_by(nthreads_per_channel * totalChannels, threads_makeratio);
158  auto workDivTimeMakeRatio1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_makeratio, threads_makeratio);
159  alpaka::exec<Acc1D>(queue,
160  workDivTimeMakeRatio1D,
162  digisDevEB.const_view(),
163  digisDevEE.const_view(),
164  scratch.sample_valuesDevBuf.value().data(),
165  scratch.sample_value_errorsDevBuf.value().data(),
166  scratch.useless_sample_valuesDevBuf.value().data(),
167  scratch.pedestal_numsDevBuf.value().data(),
168  scratch.sumAAsNullHypotDevBuf.value().data(),
169  scratch.sum0sNullHypotDevBuf.value().data(),
170  scratch.tMaxAlphaBetasDevBuf.value().data(),
171  scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
172  scratch.accTimeMaxDevBuf.value().data(),
173  scratch.accTimeWgtDevBuf.value().data(),
174  scratch.tcStateDevBuf.value().data(),
175  paramsDev.const_view(),
176  configParams.timeFitLimitsFirstEB,
177  configParams.timeFitLimitsFirstEE,
178  configParams.timeFitLimitsSecondEB,
179  configParams.timeFitLimitsSecondEE);
180 
181  auto const threads_findamplchi2 = threads_1d;
182  auto const blocks_findamplchi2 = blocks_1d;
183  auto workDivTimeFindAmplChi21D = cms::alpakatools::make_workdiv<Acc1D>(blocks_findamplchi2, threads_findamplchi2);
184  alpaka::exec<Acc1D>(queue,
185  workDivTimeFindAmplChi21D,
187  digisDevEB.const_view(),
188  digisDevEE.const_view(),
189  scratch.sample_valuesDevBuf.value().data(),
190  scratch.sample_value_errorsDevBuf.value().data(),
191  scratch.useless_sample_valuesDevBuf.value().data(),
192  scratch.tMaxAlphaBetasDevBuf.value().data(),
193  scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
194  scratch.accTimeMaxDevBuf.value().data(),
195  scratch.accTimeWgtDevBuf.value().data(),
196  scratch.sumAAsNullHypotDevBuf.value().data(),
197  scratch.sum0sNullHypotDevBuf.value().data(),
198  scratch.chi2sNullHypotDevBuf.value().data(),
199  scratch.tcStateDevBuf.value().data(),
200  scratch.ampMaxAlphaBetaDevBuf.value().data(),
201  scratch.ampMaxErrorDevBuf.value().data(),
202  scratch.timeMaxDevBuf.value().data(),
203  scratch.timeErrorDevBuf.value().data(),
204  paramsDev.const_view());
205 
206  auto const threads_timecorr = 32;
207  auto const blocks_timecorr = cms::alpakatools::divide_up_by(totalChannels, threads_timecorr);
208  auto workDivCorrFinal1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_timecorr, threads_timecorr);
209  alpaka::exec<Acc1D>(queue,
210  workDivCorrFinal1D,
212  digisDevEB.const_view(),
213  digisDevEE.const_view(),
214  uncalibRecHitsDevEB.view(),
215  uncalibRecHitsDevEE.view(),
216  conditionsDev.const_view(),
217  scratch.timeMaxDevBuf.value().data(),
218  scratch.timeErrorDevBuf.value().data(),
219  configParams.timeConstantTermEB,
220  configParams.timeConstantTermEE,
221  configParams.timeNconstEB,
222  configParams.timeNconstEE,
223  configParams.amplitudeThreshEB,
224  configParams.amplitudeThreshEE,
225  configParams.outOfTimeThreshG12pEB,
226  configParams.outOfTimeThreshG12pEE,
227  configParams.outOfTimeThreshG12mEB,
228  configParams.outOfTimeThreshG12mEE,
229  configParams.outOfTimeThreshG61pEB,
230  configParams.outOfTimeThreshG61pEE,
231  configParams.outOfTimeThreshG61mEB,
232  configParams.outOfTimeThreshG61mEE);
233  }
234  }
235 
236 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit
void minimization_procedure(Queue &queue, InputProduct const &digisDevEB, InputProduct const &digisDevEE, OutputProduct &uncalibRecHitsDevEB, OutputProduct &uncalibRecHitsDevEE, EventDataForScratchDevice &scratch, EcalMultifitConditionsDevice const &conditionsDev, ConfigurationParameters const &configParams, uint32_t const totalChannels)
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
PortableCollection< EcalMultifitParametersSoA > EcalMultifitParametersDevice
Vec< Dim2D > Vec2D
Definition: config.h:26
PortableCollection< EcalMultifitConditionsSoA > EcalMultifitConditionsDevice
Eigen::Matrix< data_type, SampleVectorSize, 1 > SampleVector
Eigen::Matrix< char, SampleVectorSize, 1 > SampleGainVector
void launchKernels(Queue &queue, InputProduct const &digisDevEB, InputProduct const &digisDevEE, OutputProduct &uncalibRecHitsDevEB, OutputProduct &uncalibRecHitsDevEE, EcalMultifitConditionsDevice const &conditionsDev, EcalMultifitParametersDevice const &paramsDev, ConfigurationParameters const &configParams)
EcalUncalibratedRecHitDeviceCollection OutputProduct
Eigen::Matrix< char, SampleVectorSize, 1 > BXVectorType
static constexpr int MAXSAMPLES
Definition: EcalDataFrame.h:48