1 #ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
2 #define DataFormats_CaloRecHit_interface_MultifitComputations_h
15 template <
int NROWS,
int NCOLS>
18 template <
int NROWS,
int NCOLS>
21 template <
int SIZE,
typename T =
float>
24 template <
int SIZE,
typename T =
float>
28 template <
typename T,
int Str
ide,
int Order = Eigen::ColMajor>
33 static constexpr
int total = Stride * (Stride + 1) / 2;
34 static constexpr
int stride = Stride;
39 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
T const&
operator()(
int const row,
int const col)
const {
40 auto const tmp = (Stride -
col) * (Stride -
col + 1) / 2;
45 template <
typename U = T>
48 auto const tmp = (Stride -
col) * (Stride -
col + 1) / 2;
72 template <
typename MatrixType1,
typename MatrixType2>
74 auto const sqrtm_0_0 =
std::sqrt(M(0, 0));
76 using T =
typename MatrixType1::base_type;
81 for (
int j = 0;
j <
i;
j++) {
83 auto const m_i_j = M(
i,
j);
84 for (
int k = 0;
k <
j; ++
k)
85 sumsq2 +=
L(
i,
k) *
L(
j,
k);
87 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
90 sumsq += value_i_j * value_i_j;
98 template <
typename MatrixType1,
typename MatrixType2>
100 MatrixType2
const& M,
102 auto const sqrtm_0_0 =
std::sqrt(M(0, 0));
104 using T =
typename MatrixType1::base_type;
106 for (
int i = 1;
i <
N;
i++) {
108 for (
int j = 0;
j <
i;
j++) {
110 auto const m_i_j = M(
i,
j);
111 for (
int k = 0;
k <
j; ++
k)
112 sumsq2 +=
L(
i,
k) *
L(
j,
k);
114 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
117 sumsq += value_i_j * value_i_j;
125 template <
typename MatrixType1,
typename MatrixType2,
typename VectorType>
128 MatrixType2
const& M,
133 auto const real_0 = pulseOffsets(0);
134 auto const sqrtm_0_0 =
std::sqrt(M(real_0, real_0));
136 using T =
typename MatrixType1::base_type;
137 b[0] = Atb(real_0) / sqrtm_0_0;
139 for (
int i = 1;
i <
N;
i++) {
140 auto const i_real = pulseOffsets(
i);
143 auto const atb = Atb(i_real);
144 for (
int j = 0;
j <
i;
j++) {
145 auto const j_real = pulseOffsets(
j);
148 for (
int k = 0;
k <
j; ++
k)
149 sumsq2 +=
L(
i,
k) *
L(
j,
k);
151 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
154 sumsq += value_i_j * value_i_j;
158 auto const l_i_i =
std::sqrt(M(i_real, i_real) - sumsq);
164 template <
typename MatrixType1,
typename MatrixType2,
typename VectorType>
167 MatrixType2
const& M,
172 using T =
typename MatrixType1::base_type;
173 auto const i =
N - 1;
174 auto const i_real = pulseOffsets(
i);
177 for (
int j = 0;
j <
i;
j++) {
178 auto const j_real = pulseOffsets(
j);
181 for (
int k = 0;
k <
j; ++
k)
182 sumsq2 +=
L(
i,
k) *
L(
j,
k);
184 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
186 sumsq += value_i_j * value_i_j;
191 auto const l_i_i =
std::sqrt(M(i_real, i_real) - sumsq);
193 b[
i] = (Atb(i_real) -
total) / l_i_i;
196 template <
typename MatrixType1,
typename MatrixType2,
typename MatrixType3>
198 MatrixType2
const& pulseMatrixView,
199 MatrixType3
const& matrixL) {
201 constexpr
auto NPULSES = MatrixType2::ColsAtCompileTime;
202 constexpr
auto NSAMPLES = MatrixType2::RowsAtCompileTime;
205 for (
int icol = 0; icol < NPULSES; icol++) {
206 float reg_b[NSAMPLES];
207 float reg_L[NSAMPLES];
211 for (
int i = 0;
i < NSAMPLES;
i++) {
214 reg_b[
i] =
__ldg(&pulseMatrixView.coeffRef(
i, icol));
216 reg_b[
i] = pulseMatrixView.coeffRef(
i, icol);
217 #endif // __CUDA_ARCH__
218 reg_L[
i] = matrixL(
i, 0);
222 auto x_prev = reg_b[0] / reg_L[0];
227 for (
int iL = 1; iL < NSAMPLES; iL++) {
239 x_prev = reg_b[iL] / reg_L[iL];
242 A(iL, icol) = x_prev;
247 template <
typename MatrixType1,
typename MatrixType2>
249 MatrixType1 inputAmplitudesView,
250 MatrixType2 matrixL) {
251 constexpr
auto NSAMPLES = MatrixType1::RowsAtCompileTime;
253 float reg_b_tmp[NSAMPLES];
254 float reg_L[NSAMPLES];
258 for (
int i = 0;
i < NSAMPLES;
i++) {
259 reg_b_tmp[
i] = inputAmplitudesView(
i);
260 reg_L[
i] = matrixL(
i, 0);
264 auto x_prev = reg_b_tmp[0] / reg_L[0];
269 for (
int iL = 1; iL < NSAMPLES; iL++) {
281 x_prev = reg_b_tmp[iL] / reg_L[iL];
288 template <
typename MatrixType1,
typename MatrixType2,
typename MatrixType3,
typename MatrixType4>
289 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
void calculateChiSq(MatrixType1
const& matrixL,
290 MatrixType2
const& pulseMatrixView,
291 MatrixType3
const& resultAmplitudesVector,
292 MatrixType4
const& inputAmplitudesView,
295 constexpr
auto NPULSES = MatrixType2::ColsAtCompileTime;
296 constexpr
auto NSAMPLES = MatrixType2::RowsAtCompileTime;
300 float accum[NSAMPLES];
316 for (
int icol = 0; icol < NPULSES; icol++) {
317 float pm_col[NSAMPLES];
343 float reg_L[NSAMPLES];
348 for (
int i = 0;
i < NSAMPLES;
i++) {
349 reg_L[
i] = matrixL(
i, 0);
353 auto x_prev = accum[0] / reg_L[0];
354 accumSum += x_prev * x_prev;
358 for (
int iL = 1; iL < NSAMPLES; iL++) {
370 x_prev = accum[iL] / reg_L[iL];
373 accumSum += x_prev * x_prev;
381 template <
typename MatrixType,
typename VectorType>
382 EIGEN_DEVICE_FUNC
void fnnls(MatrixType
const& AtA,
390 const int relaxationPeriod,
391 const int relaxationFactor) {
393 constexpr
auto NPULSES = VectorType::RowsAtCompileTime;
396 Eigen::Index w_max_idx_prev = 0;
397 float w_max_prev = 0;
398 bool recompute =
false;
402 float reg_b[NPULSES];
408 if (iter > 0 || npassive == 0) {
409 auto const nactive = NPULSES - npassive;
416 Eigen::Index w_max_idx;
418 for (
int icol = npassive; icol < NPULSES; icol++) {
419 auto const icol_real = pulseOffsets(icol);
420 auto const atb = Atb(icol_real);
427 auto const w = atb - sum;
430 w_max_idx = icol - npassive;
435 if (w_max <
eps || (w_max_idx == w_max_idx_prev && w_max == w_max_prev))
442 w_max_idx_prev = w_max_idx;
445 w_max_idx += npassive;
461 if (recompute || iter == 0)
467 s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
468 for (
int i = npassive - 2;
i >= 0; --
i) {
470 for (
int j =
i + 1;
j < npassive;
j++)
477 bool hasNegative =
false;
478 bool hasNans =
false;
481 hasNegative |= s_ii <= 0;
491 for (
int i = 0;
i < npassive;
i++) {
492 auto const i_real = pulseOffsets(
i);
493 solution(i_real) =
s(
i);
504 Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
505 for (
int i = 0;
i < npassive;
i++) {
507 auto const i_real = pulseOffsets(
i);
508 auto const ratio = solution[i_real] / (solution[i_real] -
s[
i]);
512 alpha_idx_real = i_real;
518 for (
int i = 0;
i < npassive;
i++) {
519 auto const i_real = pulseOffsets(
i);
520 solution(i_real) +=
alpha * (
s(
i) - solution(i_real));
524 solution[alpha_idx_real] = 0;
532 if (iter % relaxationPeriod == 0)
533 eps *= relaxationFactor;
540 #endif // DataFormats_CaloRecHit_interface_MultifitComputations_h