1 #ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
2 #define DataFormats_CaloRecHit_interface_MultifitComputations_h
13 template <
int NROWS,
int NCOLS>
16 template <
int NROWS,
int NCOLS>
19 template <
int SIZE,
typename T =
float>
22 template <
int SIZE,
typename T =
float>
26 template <
typename T,
int Str
ide,
int Order = Eigen::ColMajor>
31 static constexpr
int total = Stride * (Stride + 1) / 2;
32 static constexpr
int stride = Stride;
37 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
T const&
operator()(
int const row,
int const col)
const {
38 auto const tmp = (Stride -
col) * (Stride -
col + 1) / 2;
43 template <
typename U = T>
46 auto const tmp = (Stride -
col) * (Stride -
col + 1) / 2;
70 template <
typename MatrixType1,
typename MatrixType2>
72 auto const sqrtm_0_0 =
std::sqrt(M(0, 0));
74 using T =
typename MatrixType1::base_type;
77 for (
int i = 1;
i < MatrixType1::stride;
i++) {
79 for (
int j = 0;
j <
i;
j++) {
81 auto const m_i_j = M(
i,
j);
82 for (
int k = 0;
k <
j; ++
k)
83 sumsq2 +=
L(
i,
k) *
L(
j,
k);
85 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
88 sumsq += value_i_j * value_i_j;
96 template <
typename MatrixType1,
typename MatrixType2>
100 auto const sqrtm_0_0 =
std::sqrt(M(0, 0));
102 using T =
typename MatrixType1::base_type;
104 for (
int i = 1;
i <
N;
i++) {
106 for (
int j = 0;
j <
i;
j++) {
108 auto const m_i_j = M(
i,
j);
109 for (
int k = 0;
k <
j; ++
k)
110 sumsq2 +=
L(
i,
k) *
L(
j,
k);
112 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
115 sumsq += value_i_j * value_i_j;
123 template <
typename MatrixType1,
typename MatrixType2,
typename VectorType>
126 MatrixType2
const& M,
127 float b[MatrixType1::stride],
131 auto const real_0 = pulseOffsets(0);
132 auto const sqrtm_0_0 =
std::sqrt(M(real_0, real_0));
134 using T =
typename MatrixType1::base_type;
135 b[0] = Atb(real_0) / sqrtm_0_0;
137 for (
int i = 1;
i <
N;
i++) {
138 auto const i_real = pulseOffsets(
i);
141 auto const atb = Atb(i_real);
142 for (
int j = 0;
j <
i;
j++) {
143 auto const j_real = pulseOffsets(
j);
146 for (
int k = 0;
k <
j; ++
k)
147 sumsq2 +=
L(
i,
k) *
L(
j,
k);
149 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
152 sumsq += value_i_j * value_i_j;
156 auto const l_i_i =
std::sqrt(M(i_real, i_real) - sumsq);
162 template <
typename MatrixType1,
typename MatrixType2,
typename VectorType>
165 MatrixType2
const& M,
166 float b[MatrixType1::stride],
170 using T =
typename MatrixType1::base_type;
171 auto const i =
N - 1;
172 auto const i_real = pulseOffsets(
i);
175 for (
int j = 0;
j <
i;
j++) {
176 auto const j_real = pulseOffsets(
j);
179 for (
int k = 0;
k <
j; ++
k)
180 sumsq2 +=
L(
i,
k) *
L(
j,
k);
182 auto const value_i_j = (m_i_j - sumsq2) /
L(
j,
j);
184 sumsq += value_i_j * value_i_j;
189 auto const l_i_i =
std::sqrt(M(i_real, i_real) - sumsq);
191 b[
i] = (Atb(i_real) -
total) / l_i_i;
194 template <
typename MatrixType1,
typename MatrixType2,
typename MatrixType3>
196 MatrixType2
const& pulseMatrixView,
197 MatrixType3
const& matrixL) {
199 constexpr
auto NPULSES = MatrixType2::ColsAtCompileTime;
200 constexpr
auto NSAMPLES = MatrixType2::RowsAtCompileTime;
203 for (
int icol = 0; icol < NPULSES; icol++) {
204 float reg_b[NSAMPLES];
205 float reg_L[NSAMPLES];
209 for (
int i = 0;
i < NSAMPLES;
i++) {
212 reg_b[
i] =
__ldg(&pulseMatrixView.coeffRef(
i, icol));
214 reg_b[
i] = pulseMatrixView.coeffRef(
i, icol);
215 #endif // __CUDA_ARCH__
216 reg_L[
i] = matrixL(
i, 0);
220 auto x_prev = reg_b[0] / reg_L[0];
225 for (
int iL = 1; iL < NSAMPLES; iL++) {
237 x_prev = reg_b[iL] / reg_L[iL];
240 A(iL, icol) = x_prev;
245 template <
typename MatrixType1,
typename MatrixType2>
247 MatrixType1 inputAmplitudesView,
248 MatrixType2 matrixL) {
249 constexpr
auto NSAMPLES = MatrixType1::RowsAtCompileTime;
251 float reg_b_tmp[NSAMPLES];
252 float reg_L[NSAMPLES];
256 for (
int i = 0;
i < NSAMPLES;
i++) {
257 reg_b_tmp[
i] = inputAmplitudesView(
i);
258 reg_L[
i] = matrixL(
i, 0);
262 auto x_prev = reg_b_tmp[0] / reg_L[0];
267 for (
int iL = 1; iL < NSAMPLES; iL++) {
279 x_prev = reg_b_tmp[iL] / reg_L[iL];
286 template <
typename MatrixType1,
typename MatrixType2,
typename MatrixType3,
typename MatrixType4>
287 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC
void calculateChiSq(MatrixType1
const& matrixL,
288 MatrixType2
const& pulseMatrixView,
289 MatrixType3
const& resultAmplitudesVector,
290 MatrixType4
const& inputAmplitudesView,
293 constexpr
auto NPULSES = MatrixType2::ColsAtCompileTime;
294 constexpr
auto NSAMPLES = MatrixType2::RowsAtCompileTime;
298 float accum[NSAMPLES];
314 for (
int icol = 0; icol < NPULSES; icol++) {
315 float pm_col[NSAMPLES];
341 float reg_L[NSAMPLES];
346 for (
int i = 0;
i < NSAMPLES;
i++) {
347 reg_L[
i] = matrixL(
i, 0);
351 auto x_prev = accum[0] / reg_L[0];
352 accumSum += x_prev * x_prev;
356 for (
int iL = 1; iL < NSAMPLES; iL++) {
368 x_prev = accum[iL] / reg_L[iL];
371 accumSum += x_prev * x_prev;
379 template <
typename MatrixType,
typename VectorType>
380 EIGEN_DEVICE_FUNC
void fnnls(MatrixType
const& AtA,
388 const int relaxationPeriod,
389 const int relaxationFactor) {
391 constexpr
auto NPULSES = VectorType::RowsAtCompileTime;
394 Eigen::Index w_max_idx_prev = 0;
395 float w_max_prev = 0;
396 bool recompute =
false;
400 float reg_b[NPULSES];
406 if (iter > 0 || npassive == 0) {
407 auto const nactive = NPULSES - npassive;
414 Eigen::Index w_max_idx;
416 for (
int icol = npassive; icol < NPULSES; icol++) {
417 auto const icol_real = pulseOffsets(icol);
418 auto const atb = Atb(icol_real);
425 auto const w = atb - sum;
428 w_max_idx = icol - npassive;
433 if (w_max < eps || (w_max_idx == w_max_idx_prev && w_max == w_max_prev))
440 w_max_idx_prev = w_max_idx;
443 w_max_idx += npassive;
459 if (recompute || iter == 0)
465 s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
466 for (
int i = npassive - 2;
i >= 0; --
i) {
468 for (
int j =
i + 1;
j < npassive;
j++)
475 bool hasNegative =
false;
476 bool hasNans =
false;
479 hasNegative |= s_ii <= 0;
489 for (
int i = 0;
i < npassive;
i++) {
490 auto const i_real = pulseOffsets(
i);
491 solution(i_real) =
s(
i);
502 Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
503 for (
int i = 0;
i < npassive;
i++) {
505 auto const i_real = pulseOffsets(
i);
506 auto const ratio = solution[i_real] / (solution[i_real] -
s[
i]);
510 alpha_idx_real = i_real;
516 for (
int i = 0;
i < npassive;
i++) {
517 auto const i_real = pulseOffsets(
i);
518 solution(i_real) +=
alpha * (
s(
i) - solution(i_real));
522 solution[alpha_idx_real] = 0;
530 if (iter % relaxationPeriod == 0)
531 eps *= relaxationFactor;
538 #endif // DataFormats_CaloRecHit_interface_MultifitComputations_h