CMS 3D CMS Logo

AmplitudeComputationKernels.dev.cc
Go to the documentation of this file.
1 #include <cmath>
2 #include <limits>
3 #include <alpaka/alpaka.hpp>
4 
9 
11 #include "KernelHelpers.h"
13 
15 
16  using namespace ::ecal::multifit;
17 
18  template <typename MatrixType>
19  ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance(EcalPulseCovariance const& pulse_covariance,
20  MatrixType& inverse_cov,
21  SampleVector const& amplitudes) {
22  constexpr auto nsamples = SampleVector::RowsAtCompileTime;
23  constexpr auto npulses = BXVectorType::RowsAtCompileTime;
24 
26  for (unsigned int ipulse = 0; ipulse < npulses; ++ipulse) {
27  auto const amplitude = amplitudes.coeff(ipulse);
28  if (amplitude == 0)
29  continue;
30 
31  // FIXME: ipulse - 5 -> ipulse - firstOffset
32  int bx = ipulse - 5;
33  int first_sample_t = std::max(0, bx + 3);
34  int offset = -3 - bx;
35 
36  auto const value_sq = amplitude * amplitude;
37 
38  for (int col = first_sample_t; col < nsamples; ++col) {
39  for (int row = col; row < nsamples; ++row) {
40  inverse_cov(row, col) += value_sq * pulse_covariance.covval[row + offset][col + offset];
41  }
42  }
43  }
44  }
45 
57  public:
58  template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
59  ALPAKA_FN_ACC void operator()(TAcc const& acc,
60  InputProduct::ConstView const& digisDevEB,
61  InputProduct::ConstView const& digisDevEE,
62  OutputProduct::View uncalibRecHitsEB,
63  OutputProduct::View uncalibRecHitsEE,
64  EcalMultifitConditionsDevice::ConstView conditionsDev,
65  ::ecal::multifit::SampleMatrix const* noisecov,
66  ::ecal::multifit::PulseMatrixType const* pulse_matrix,
69  bool* hasSwitchToGain6,
70  bool* hasSwitchToGain1,
71  bool* isSaturated,
72  char* acState,
73  int max_iterations) const {
74  // FIXME: ecal has 10 samples and 10 pulses....
75  // but this needs to be properly treated and renamed everywhere
76  constexpr auto NSAMPLES = SampleMatrix::RowsAtCompileTime;
77  constexpr auto NPULSES = SampleMatrix::ColsAtCompileTime;
78  static_assert(NSAMPLES == NPULSES);
79 
81 
82  auto const elemsPerBlock(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
83 
84  auto const nchannelsEB = digisDevEB.size();
85  auto const nchannels = nchannelsEB + digisDevEE.size();
86  auto const offsetForHashes = conditionsDev.offsetEE();
87 
88  auto const* pulse_covariance = reinterpret_cast<const EcalPulseCovariance*>(conditionsDev.pulseCovariance());
89 
90  // shared memory
91  DataType* shrmem = alpaka::getDynSharedMem<DataType>(acc);
92 
93  // channel
94  for (auto idx : cms::alpakatools::uniform_elements(acc, nchannels)) {
95  if (static_cast<MinimizationState>(acState[idx]) == MinimizationState::Precomputed)
96  continue;
97 
98  auto const elemIdx = idx % elemsPerBlock;
99 
100  // shared memory pointers
101  DataType* shrMatrixLForFnnlsStorage = shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * elemIdx;
102  DataType* shrAtAStorage =
103  shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * (elemIdx + elemsPerBlock);
104 
105  auto* amplitudes =
106  reinterpret_cast<SampleVector*>(idx >= nchannelsEB ? uncalibRecHitsEE.outOfTimeAmplitudes()->data()
107  : uncalibRecHitsEB.outOfTimeAmplitudes()->data());
108  auto* energies = idx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
109  auto* chi2s = idx >= nchannelsEB ? uncalibRecHitsEE.chi2() : uncalibRecHitsEB.chi2();
110 
111  // get the hash
112  int const inputCh = idx >= nchannelsEB ? idx - nchannelsEB : idx;
113  auto const* dids = idx >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
114  auto const did = DetId{dids[inputCh]};
115  auto const isBarrel = did.subdetId() == EcalBarrel;
116  auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
117  : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
118 
119  // inits
120  int npassive = 0;
121 
124  for (int i = 0; i < NPULSES; ++i)
125  pulseOffsets(i) = i;
126 
129  for (int counter = 0; counter < NPULSES; ++counter)
130  resultAmplitudes(counter) = 0;
131 
132  // inits
133  //SampleDecompLLT covariance_decomposition;
134  //SampleMatrix inverse_cov;
135  // SampleVector::Scalar chi2 = 0, chi2_now = 0;
136  float chi2 = 0, chi2_now = 0;
137 
138  // loop for up to max_iterations
139  for (int iter = 0; iter < max_iterations; ++iter) {
140  //inverse_cov = noisecov[idx];
141  //DataType covMatrixStorage[MapSymM<DataType, NSAMPLES>::total];
142  DataType* covMatrixStorage = shrMatrixLForFnnlsStorage;
143  calo::multifit::MapSymM<DataType, NSAMPLES> covMatrix{covMatrixStorage};
144  int counter = 0;
146  for (int col = 0; col < NSAMPLES; ++col) {
148  for (int row = col; row < NSAMPLES; ++row) {
149  covMatrixStorage[counter++] = noisecov[idx].coeffRef(row, col);
150  }
151  }
152  update_covariance(pulse_covariance[hashedId], covMatrix, resultAmplitudes);
153 
154  // compute actual covariance decomposition
155  //covariance_decomposition.compute(inverse_cov);
156  //auto const& matrixL = covariance_decomposition.matrixL();
158  calo::multifit::MapSymM<DataType, NSAMPLES> matrixL{matrixLStorage};
160 
161  // L * A = P
163  calo::multifit::solve_forward_subst_matrix(A, pulse_matrix[idx], matrixL);
164 
165  // L b = s
166  float reg_b[NSAMPLES];
168 
169  // FIXME: shared mem
170  //DataType AtAStorage[MapSymM<DataType, NPULSES>::total];
172  //SampleMatrix AtA;
173  SampleVector Atb;
175  for (int icol = 0; icol < NPULSES; ++icol) {
176  float reg_ai[NSAMPLES];
177 
178  // load column icol
180  for (int counter = 0; counter < NSAMPLES; ++counter)
181  reg_ai[counter] = A(counter, icol);
182 
183  // compute diagoanl
184  float sum = 0.f;
186  for (int counter = 0; counter < NSAMPLES; ++counter)
187  sum += reg_ai[counter] * reg_ai[counter];
188 
189  // store
190  AtA(icol, icol) = sum;
191 
192  // go thru the other columns
194  for (int j = icol + 1; j < NPULSES; ++j) {
195  // load column j
196  float reg_aj[NSAMPLES];
198  for (int counter = 0; counter < NSAMPLES; ++counter)
199  reg_aj[counter] = A(counter, j);
200 
201  // accum
202  float sum = 0.f;
204  for (int counter = 0; counter < NSAMPLES; ++counter)
205  sum += reg_aj[counter] * reg_ai[counter];
206 
207  // store
208  //AtA(icol, j) = sum;
209  AtA(j, icol) = sum;
210  }
211 
212  // Atb accum
213  float sum_atb = 0.f;
215  for (int counter = 0; counter < NSAMPLES; ++counter)
216  sum_atb += reg_ai[counter] * reg_b[counter];
217 
218  // store atb
219  Atb(icol) = sum_atb;
220  }
221 
222  // FIXME: shared mem
223  //DataType matrixLForFnnlsStorage[MapSymM<DataType, NPULSES>::total];
224  calo::multifit::MapSymM<DataType, NPULSES> matrixLForFnnls{shrMatrixLForFnnlsStorage};
225 
227  Atb,
228  //amplitudes[idx],
229  resultAmplitudes,
230  npassive,
231  pulseOffsets,
232  matrixLForFnnls,
233  1e-11,
234  500,
235  16,
236  2);
237 
238  calo::multifit::calculateChiSq(matrixL, pulse_matrix[idx], resultAmplitudes, samples[idx], chi2_now);
239 
240  auto const deltachi2 = chi2_now - chi2;
241  chi2 = chi2_now;
242 
243  if (std::abs(deltachi2) < 1e-3)
244  break;
245  }
246 
247  // store to global output values
248  // FIXME: amplitudes are used in global directly
249  chi2s[inputCh] = chi2;
250  energies[inputCh] = resultAmplitudes(5);
251 
253  for (int counter = 0; counter < NPULSES; ++counter)
254  amplitudes[inputCh](counter) = resultAmplitudes(counter);
255  }
256  }
257  };
258 
260  InputProduct const& digisDevEB,
261  InputProduct const& digisDevEE,
262  OutputProduct& uncalibRecHitsDevEB,
263  OutputProduct& uncalibRecHitsDevEE,
264  EventDataForScratchDevice& scratch,
265  EcalMultifitConditionsDevice const& conditionsDev,
266  ConfigurationParameters const& configParams,
267  uint32_t const totalChannels) {
269  // TODO: configure from python
270  auto threads_min = configParams.kernelMinimizeThreads[0];
271  auto blocks_min = cms::alpakatools::divide_up_by(totalChannels, threads_min);
272 
273  auto workDivMinimize = cms::alpakatools::make_workdiv<Acc1D>(blocks_min, threads_min);
274  alpaka::exec<Acc1D>(queue,
275  workDivMinimize,
276  Kernel_minimize{},
277  digisDevEB.const_view(),
278  digisDevEE.const_view(),
279  uncalibRecHitsDevEB.view(),
280  uncalibRecHitsDevEE.view(),
281  conditionsDev.const_view(),
282  reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
283  reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
284  reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
285  reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
286  scratch.hasSwitchToGain6DevBuf.data(),
287  scratch.hasSwitchToGain1DevBuf.data(),
288  scratch.isSaturatedDevBuf.data(),
289  scratch.acStateDevBuf.data(),
290  50); // maximum number of fit iterations
291  }
292 
293 } // namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit
294 
295 namespace alpaka::trait {
297 
299  template <typename TAcc>
300  struct BlockSharedMemDynSizeBytes<Kernel_minimize, TAcc> {
302  template <typename TVec, typename... TArgs>
303  ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_minimize const&,
304  TVec const& threadsPerBlock,
305  TVec const& elemsPerThread,
306  TArgs const&...) -> std::size_t {
308 
309  // return the amount of dynamic shared memory needed
310  std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
312  sizeof(ScalarType);
313  return bytes;
314  }
315  };
316 } // namespace alpaka::trait
cms::alpakatools::device_buffer< Device, bool[]> hasSwitchToGain6DevBuf
void minimization_procedure(Queue &queue, InputProduct const &digisDevEB, InputProduct const &digisDevEE, OutputProduct &uncalibRecHitsDevEB, OutputProduct &uncalibRecHitsDevEE, EventDataForScratchDevice &scratch, EcalMultifitConditionsDevice const &conditionsDev, ConfigurationParameters const &configParams, uint32_t const totalChannels)
ALPAKA_FN_ACC auto uniform_elements(TAcc const &acc, TArgs... args)
Definition: workdivision.h:311
Eigen::Matrix< data_type, SampleVectorSize, SampleVectorSize > SampleMatrix
cms::alpakatools::device_buffer< Device, SMT[]> noisecovDevBuf
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
double Scalar
Definition: Definitions.h:25
cms::alpakatools::device_buffer< Device, PMT[]> pulse_matrixDevBuf
PortableCollection< EcalMultifitConditionsSoA > EcalMultifitConditionsDevice
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_minimize const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1 &L, MatrixType2 const &M)
cms::alpakatools::device_buffer< Device, bool[]> isSaturatedDevBuf
cms::alpakatools::device_buffer< Device, BXVT[]> activeBXsDevBuf
T ScalarType
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
Eigen::Matrix< data_type, SampleVectorSize, 1 > SampleVector
Eigen::Matrix< data_type, SampleVectorSize, SampleVectorSize > PulseMatrixType
ALPAKA_FN_ACC uint32_t hashedIndexEB(uint32_t id)
Eigen::Matrix< T, SIZE, 1 > ColumnVector
cms::alpakatools::device_buffer< Device, char[]> acStateDevBuf
EIGEN_DEVICE_FUNC void fnnls(MatrixType const &AtA, VectorType const &Atb, VectorType &solution, int &npassive, ColumnVector< VectorType::RowsAtCompileTime, int > &pulseOffsets, MapSymM< float, VectorType::RowsAtCompileTime > &matrixL, double eps, const int maxIterations, const int relaxationPeriod, const int relaxationFactor)
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calculateChiSq(MatrixType1 const &matrixL, MatrixType2 const &pulseMatrixView, MatrixType3 const &resultAmplitudesVector, MatrixType4 const &inputAmplitudesView, float &chi2)
EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1 &A, MatrixType2 const &pulseMatrixView, MatrixType3 const &matrixL)
ALPAKA_FN_ACC uint32_t hashedIndexEE(uint32_t id)
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
EcalUncalibratedRecHitDeviceCollection OutputProduct
ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance(EcalPulseCovariance const &pulse_covariance, MatrixType &inverse_cov, SampleVector const &amplitudes)
float covval[EcalPulseShape::TEMPLATESAMPLES][EcalPulseShape::TEMPLATESAMPLES]
Definition: DetId.h:17
EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime], MatrixType1 inputAmplitudesView, MatrixType2 matrixL)
Eigen::Matrix< double, SampleVectorSize, 1 > SampleVector
bool isSaturated(const Digi &digi, const int &maxADCvalue, int ifirst, int n)
Eigen::Matrix< char, SampleVectorSize, 1 > BXVectorType
static std::atomic< unsigned int > counter
cms::alpakatools::device_buffer< Device, bool[]> hasSwitchToGain1DevBuf
col
Definition: cuy.py:1009
Definition: APVGainStruct.h:7
Eigen::Matrix< float, NROWS, NCOLS, Eigen::ColMajor > ColMajorMatrix
cms::alpakatools::device_buffer< Device, SVT[]> samplesDevBuf
ALPAKA_FN_ACC void operator()(TAcc const &acc, InputProduct::ConstView const &digisDevEB, InputProduct::ConstView const &digisDevEE, OutputProduct::View uncalibRecHitsEB, OutputProduct::View uncalibRecHitsEE, EcalMultifitConditionsDevice::ConstView conditionsDev, ::ecal::multifit::SampleMatrix const *noisecov, ::ecal::multifit::PulseMatrixType const *pulse_matrix, ::ecal::multifit::BXVectorType *bxs, ::ecal::multifit::SampleVector const *samples, bool *hasSwitchToGain6, bool *hasSwitchToGain1, bool *isSaturated, char *acState, int max_iterations) const