3 #include <alpaka/alpaka.hpp> 18 template <
typename MatrixType>
20 MatrixType& inverse_cov,
22 constexpr auto nsamples = SampleVector::RowsAtCompileTime;
23 constexpr auto npulses = BXVectorType::RowsAtCompileTime;
26 for (
unsigned int ipulse = 0; ipulse < npulses; ++ipulse) {
27 auto const amplitude = amplitudes.coeff(ipulse);
38 for (
int col = first_sample_t;
col < nsamples; ++
col) {
39 for (
int row =
col; row < nsamples; ++row) {
58 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
60 InputProduct::ConstView
const& digisDevEB,
61 InputProduct::ConstView
const& digisDevEE,
64 EcalMultifitConditionsDevice::ConstView conditionsDev,
69 bool* hasSwitchToGain6,
70 bool* hasSwitchToGain1,
73 int max_iterations)
const {
76 constexpr auto NSAMPLES = SampleMatrix::RowsAtCompileTime;
77 constexpr auto NPULSES = SampleMatrix::ColsAtCompileTime;
78 static_assert(NSAMPLES == NPULSES);
82 auto const elemsPerBlock(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
84 auto const nchannelsEB = digisDevEB.size();
85 auto const nchannels = nchannelsEB + digisDevEE.size();
86 auto const offsetForHashes = conditionsDev.offsetEE();
88 auto const* pulse_covariance =
reinterpret_cast<const EcalPulseCovariance*
>(conditionsDev.pulseCovariance());
91 DataType* shrmem = alpaka::getDynSharedMem<DataType>(acc);
98 auto const elemIdx =
idx % elemsPerBlock;
106 reinterpret_cast<SampleVector*
>(
idx >= nchannelsEB ? uncalibRecHitsEE.outOfTimeAmplitudes()->data()
107 : uncalibRecHitsEB.outOfTimeAmplitudes()->data());
108 auto* energies =
idx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
109 auto* chi2s =
idx >= nchannelsEB ? uncalibRecHitsEE.chi2() : uncalibRecHitsEB.chi2();
112 int const inputCh =
idx >= nchannelsEB ?
idx - nchannelsEB :
idx;
113 auto const* dids =
idx >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
114 auto const did =
DetId{dids[inputCh]};
124 for (
int i = 0;
i < NPULSES; ++
i)
136 float chi2 = 0, chi2_now = 0;
139 for (
int iter = 0; iter < max_iterations; ++iter) {
142 DataType* covMatrixStorage = shrMatrixLForFnnlsStorage;
146 for (
int col = 0;
col < NSAMPLES; ++
col) {
148 for (
int row =
col; row < NSAMPLES; ++row) {
149 covMatrixStorage[
counter++] = noisecov[
idx].coeffRef(row,
col);
166 float reg_b[NSAMPLES];
175 for (
int icol = 0; icol < NPULSES; ++icol) {
176 float reg_ai[NSAMPLES];
190 AtA(icol, icol) = sum;
194 for (
int j = icol + 1;
j < NPULSES; ++
j) {
196 float reg_aj[NSAMPLES];
240 auto const deltachi2 = chi2_now -
chi2;
249 chi2s[inputCh] =
chi2;
250 energies[inputCh] = resultAmplitudes(5);
267 uint32_t
const totalChannels) {
273 auto workDivMinimize = cms::alpakatools::make_workdiv<Acc1D>(blocks_min, threads_min);
274 alpaka::exec<Acc1D>(
queue,
277 digisDevEB.const_view(),
278 digisDevEE.const_view(),
279 uncalibRecHitsDevEB.view(),
280 uncalibRecHitsDevEE.view(),
281 conditionsDev.const_view(),
283 reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.
pulse_matrixDevBuf.data()),
284 reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.
activeBXsDevBuf.data()),
285 reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.
samplesDevBuf.data()),
299 template <
typename TAcc>
302 template <
typename TVec,
typename... TArgs>
304 TVec
const& threadsPerBlock,
305 TVec
const& elemsPerThread,
306 TArgs
const&...) -> std::size_t {
310 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
cms::alpakatools::device_buffer< Device, bool[]> hasSwitchToGain6DevBuf
void minimization_procedure(Queue &queue, InputProduct const &digisDevEB, InputProduct const &digisDevEE, OutputProduct &uncalibRecHitsDevEB, OutputProduct &uncalibRecHitsDevEE, EventDataForScratchDevice &scratch, EcalMultifitConditionsDevice const &conditionsDev, ConfigurationParameters const &configParams, uint32_t const totalChannels)
Eigen::Matrix< data_type, SampleVectorSize, SampleVectorSize > SampleMatrix
cms::alpakatools::device_buffer< Device, SMT[]> noisecovDevBuf
cms::alpakatools::device_buffer< Device, PMT[]> pulse_matrixDevBuf
PortableCollection< EcalMultifitConditionsSoA > EcalMultifitConditionsDevice
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_minimize const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1 &L, MatrixType2 const &M)
cms::alpakatools::device_buffer< Device, bool[]> isSaturatedDevBuf
cms::alpakatools::device_buffer< Device, BXVT[]> activeBXsDevBuf
Eigen::Matrix< data_type, SampleVectorSize, 1 > SampleVector
Eigen::Matrix< data_type, SampleVectorSize, SampleVectorSize > PulseMatrixType
ALPAKA_FN_ACC uint32_t hashedIndexEB(uint32_t id)
Eigen::Matrix< T, SIZE, 1 > ColumnVector
EcalDigiDeviceCollection InputProduct
cms::alpakatools::device_buffer< Device, char[]> acStateDevBuf
EIGEN_DEVICE_FUNC void fnnls(MatrixType const &AtA, VectorType const &Atb, VectorType &solution, int &npassive, ColumnVector< VectorType::RowsAtCompileTime, int > &pulseOffsets, MapSymM< float, VectorType::RowsAtCompileTime > &matrixL, double eps, const int maxIterations, const int relaxationPeriod, const int relaxationFactor)
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calculateChiSq(MatrixType1 const &matrixL, MatrixType2 const &pulseMatrixView, MatrixType3 const &resultAmplitudesVector, MatrixType4 const &inputAmplitudesView, float &chi2)
EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1 &A, MatrixType2 const &pulseMatrixView, MatrixType3 const &matrixL)
ALPAKA_FN_ACC uint32_t hashedIndexEE(uint32_t id)
Abs< T >::type abs(const T &t)
EcalUncalibratedRecHitDeviceCollection OutputProduct
ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance(EcalPulseCovariance const &pulse_covariance, MatrixType &inverse_cov, SampleVector const &litudes)
float covval[EcalPulseShape::TEMPLATESAMPLES][EcalPulseShape::TEMPLATESAMPLES]
EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime], MatrixType1 inputAmplitudesView, MatrixType2 matrixL)
Eigen::Matrix< double, SampleVectorSize, 1 > SampleVector
bool isSaturated(const Digi &digi, const int &maxADCvalue, int ifirst, int n)
Eigen::Matrix< char, SampleVectorSize, 1 > BXVectorType
static std::atomic< unsigned int > counter
cms::alpakatools::device_buffer< Device, bool[]> hasSwitchToGain1DevBuf
std::array< uint32_t, 3 > kernelMinimizeThreads
Eigen::Matrix< float, NROWS, NCOLS, Eigen::ColMajor > ColMajorMatrix
cms::alpakatools::device_buffer< Device, SVT[]> samplesDevBuf
ALPAKA_FN_ACC void operator()(TAcc const &acc, InputProduct::ConstView const &digisDevEB, InputProduct::ConstView const &digisDevEE, OutputProduct::View uncalibRecHitsEB, OutputProduct::View uncalibRecHitsEE, EcalMultifitConditionsDevice::ConstView conditionsDev, ::ecal::multifit::SampleMatrix const *noisecov, ::ecal::multifit::PulseMatrixType const *pulse_matrix, ::ecal::multifit::BXVectorType *bxs, ::ecal::multifit::SampleVector const *samples, bool *hasSwitchToGain6, bool *hasSwitchToGain1, bool *isSaturated, char *acState, int max_iterations) const