CMS 3D CMS Logo

Classes | Typedefs | Functions
calo::multifit Namespace Reference

Classes

struct  MapMForPM
 
struct  MapSymM
 

Typedefs

template<int NROWS, int NCOLS>
using ColMajorMatrix = Eigen::Matrix< float, NROWS, NCOLS, Eigen::ColMajor >
 
template<int SIZE, typename T = float>
using ColumnVector = Eigen::Matrix< T, SIZE, 1 >
 
template<int NROWS, int NCOLS>
using RowMajorMatrix = Eigen::Matrix< float, NROWS, NCOLS, Eigen::RowMajor >
 
template<int SIZE, typename T = float>
using RowVector = Eigen::Matrix< T, 1, SIZE >
 

Functions

template<typename MatrixType1 , typename MatrixType2 , typename MatrixType3 , typename MatrixType4 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calculateChiSq (MatrixType1 const &matrixL, MatrixType2 const &pulseMatrixView, MatrixType3 const &resultAmplitudesVector, MatrixType4 const &inputAmplitudesView, float &chi2)
 
template<typename MatrixType1 , typename MatrixType2 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition (MatrixType1 &L, MatrixType2 const &M, int const N)
 
template<typename MatrixType1 , typename MatrixType2 , typename VectorType >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets (MatrixType1 &L, MatrixType2 const &M, float b[MatrixType1::stride], VectorType const &Atb, int const N, ColumnVector< MatrixType1::stride, int > const &pulseOffsets)
 
template<typename MatrixType1 , typename MatrixType2 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled (MatrixType1 &L, MatrixType2 const &M)
 
template<typename MatrixType , typename VectorType >
EIGEN_DEVICE_FUNC void fnnls (MatrixType const &AtA, VectorType const &Atb, VectorType &solution, int &npassive, ColumnVector< VectorType::RowsAtCompileTime, int > &pulseOffsets, MapSymM< float, VectorType::RowsAtCompileTime > &matrixL, double eps, const int maxIterations, const int relaxationPeriod, const int relaxationFactor)
 
template<typename MatrixType1 , typename MatrixType2 , typename MatrixType3 >
EIGEN_DEVICE_FUNC void solve_forward_subst_matrix (MatrixType1 &A, MatrixType2 const &pulseMatrixView, MatrixType3 const &matrixL)
 
template<typename MatrixType1 , typename MatrixType2 >
EIGEN_DEVICE_FUNC void solve_forward_subst_vector (float reg_b[MatrixType1::RowsAtCompileTime], MatrixType1 inputAmplitudesView, MatrixType2 matrixL)
 
template<typename MatrixType1 , typename MatrixType2 , typename VectorType >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets (MatrixType1 &L, MatrixType2 const &M, float b[MatrixType1::stride], VectorType const &Atb, int const N, ColumnVector< MatrixType1::stride, int > const &pulseOffsets)
 

Typedef Documentation

◆ ColMajorMatrix

template<int NROWS, int NCOLS>
using calo::multifit::ColMajorMatrix = typedef Eigen::Matrix<float, NROWS, NCOLS, Eigen::ColMajor>

Definition at line 16 of file MultifitComputations.h.

◆ ColumnVector

template<int SIZE, typename T = float>
using calo::multifit::ColumnVector = typedef Eigen::Matrix<T, SIZE, 1>

Definition at line 22 of file MultifitComputations.h.

◆ RowMajorMatrix

template<int NROWS, int NCOLS>
using calo::multifit::RowMajorMatrix = typedef Eigen::Matrix<float, NROWS, NCOLS, Eigen::RowMajor>

Definition at line 19 of file MultifitComputations.h.

◆ RowVector

template<int SIZE, typename T = float>
using calo::multifit::RowVector = typedef Eigen::Matrix<T, 1, SIZE>

Definition at line 25 of file MultifitComputations.h.

Function Documentation

◆ calculateChiSq()

template<typename MatrixType1 , typename MatrixType2 , typename MatrixType3 , typename MatrixType4 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calo::multifit::calculateChiSq ( MatrixType1 const &  matrixL,
MatrixType2 const &  pulseMatrixView,
MatrixType3 const &  resultAmplitudesVector,
MatrixType4 const &  inputAmplitudesView,
float &  chi2 
)

Definition at line 289 of file MultifitComputations.h.

References cms::cudacompat::__ldg(), nano_mu_local_reco_cff::chi2, CMS_UNROLL_LOOP, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), counter, mps_fire::i, and mysort::results.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

293  {
294  // FIXME: this assumes pulses are on columns and samples on rows
295  constexpr auto NPULSES = MatrixType2::ColsAtCompileTime;
296  constexpr auto NSAMPLES = MatrixType2::RowsAtCompileTime;
297 
298  // replace pulseMatrixView * resultAmplitudesVector - inputAmplitudesView
299  // NOTE:
300  float accum[NSAMPLES];
301  {
302  float results[NPULSES];
303 
304  // preload results and permute according to the pulse offsets /////////////// ??? this is not done in ECAL
306  for (int counter = 0; counter < NPULSES; counter++) {
307  results[counter] = resultAmplitudesVector[counter];
308  }
309 
310  // load accum
312  for (int counter = 0; counter < NSAMPLES; counter++)
313  accum[counter] = -inputAmplitudesView(counter);
314 
315  // iterate
316  for (int icol = 0; icol < NPULSES; icol++) {
317  float pm_col[NSAMPLES];
318 
319  // preload a column of pulse matrix
321  for (int counter = 0; counter < NSAMPLES; counter++)
322 #ifdef __CUDA_ARCH__
323  pm_col[counter] = __ldg(&pulseMatrixView.coeffRef(counter, icol));
324 #else
325  pm_col[counter] = pulseMatrixView.coeffRef(counter, icol);
326 #endif
327 
328  // accum
330  for (int counter = 0; counter < NSAMPLES; counter++)
331  accum[counter] += results[icol] * pm_col[counter];
332  }
333  }
334 
335  // compute chi2 and check that there is no rotation
336  // chi2 = matrixDecomposition
337  // .matrixL()
338  // . solve(mapAccum)
339  // .solve(pulseMatrixView * resultAmplitudesVector - inputAmplitudesView)
340  // .squaredNorm();
341 
342  {
343  float reg_L[NSAMPLES];
344  float accumSum = 0;
345 
346  // preload a column and load column 0 of cholesky
348  for (int i = 0; i < NSAMPLES; i++) {
349  reg_L[i] = matrixL(i, 0);
350  }
351 
352  // compute x0 and store it
353  auto x_prev = accum[0] / reg_L[0];
354  accumSum += x_prev * x_prev;
355 
356  // iterate
358  for (int iL = 1; iL < NSAMPLES; iL++) {
359  // update accum
361  for (int counter = iL; counter < NSAMPLES; counter++)
362  accum[counter] -= x_prev * reg_L[counter];
363 
364  // load the next column of cholesky
366  for (int counter = iL; counter < NSAMPLES; counter++)
367  reg_L[counter] = matrixL(counter, iL);
368 
369  // compute the next x for M(iL, icol)
370  x_prev = accum[iL] / reg_L[iL];
371 
372  // store the result value
373  accumSum += x_prev * x_prev;
374  }
375 
376  chi2 = accumSum;
377  }
378  }
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
T __ldg(T const *x)
Definition: cudaCompat.h:137
static std::atomic< unsigned int > counter
results
Definition: mysort.py:8

◆ compute_decomposition()

template<typename MatrixType1 , typename MatrixType2 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calo::multifit::compute_decomposition ( MatrixType1 &  L,
MatrixType2 const &  M,
int const  N 
)

Definition at line 99 of file MultifitComputations.h.

References mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, dttmaxenums::L, N, and mathSSE::sqrt().

101  {
102  auto const sqrtm_0_0 = std::sqrt(M(0, 0));
103  L(0, 0) = sqrtm_0_0;
104  using T = typename MatrixType1::base_type;
105 
106  for (int i = 1; i < N; i++) {
107  T sumsq{0};
108  for (int j = 0; j < i; j++) {
109  T sumsq2{0};
110  auto const m_i_j = M(i, j);
111  for (int k = 0; k < j; ++k)
112  sumsq2 += L(i, k) * L(j, k);
113 
114  auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
115  L(i, j) = value_i_j;
116 
117  sumsq += value_i_j * value_i_j;
118  }
119 
120  auto const l_i_i = std::sqrt(M(i, i) - sumsq);
121  L(i, i) = l_i_i;
122  }
123  }
T sqrt(T t)
Definition: SSEVec.h:19
#define N
Definition: blowfish.cc:9
long double T

◆ compute_decomposition_forwardsubst_with_offsets()

template<typename MatrixType1 , typename MatrixType2 , typename VectorType >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calo::multifit::compute_decomposition_forwardsubst_with_offsets ( MatrixType1 &  L,
MatrixType2 const &  M,
float  b[MatrixType1::stride],
VectorType const &  Atb,
int const  N,
ColumnVector< MatrixType1::stride, int > const &  pulseOffsets 
)

Definition at line 126 of file MultifitComputations.h.

References b, mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, dttmaxenums::L, SiStripPI::max, SiStripPI::min, N, HLT_2024v13_cff::pulseOffsets, mathSSE::sqrt(), and dqmMemoryStats::total.

Referenced by fnnls().

132  {
133  auto const real_0 = pulseOffsets(0);
134  auto const sqrtm_0_0 = std::sqrt(M(real_0, real_0));
135  L(0, 0) = sqrtm_0_0;
136  using T = typename MatrixType1::base_type;
137  b[0] = Atb(real_0) / sqrtm_0_0;
138 
139  for (int i = 1; i < N; i++) {
140  auto const i_real = pulseOffsets(i);
141  T sumsq{0};
142  T total = 0;
143  auto const atb = Atb(i_real);
144  for (int j = 0; j < i; j++) {
145  auto const j_real = pulseOffsets(j);
146  T sumsq2{0};
147  auto const m_i_j = M(std::max(i_real, j_real), std::min(i_real, j_real));
148  for (int k = 0; k < j; ++k)
149  sumsq2 += L(i, k) * L(j, k);
150 
151  auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
152  L(i, j) = value_i_j;
153 
154  sumsq += value_i_j * value_i_j;
155  total += value_i_j * b[j];
156  }
157 
158  auto const l_i_i = std::sqrt(M(i_real, i_real) - sumsq);
159  L(i, i) = l_i_i;
160  b[i] = (atb - total) / l_i_i;
161  }
162  }
T sqrt(T t)
Definition: SSEVec.h:19
#define N
Definition: blowfish.cc:9
double b
Definition: hdecay.h:120
long double T

◆ compute_decomposition_unrolled()

template<typename MatrixType1 , typename MatrixType2 >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calo::multifit::compute_decomposition_unrolled ( MatrixType1 &  L,
MatrixType2 const &  M 
)

Definition at line 73 of file MultifitComputations.h.

References CMS_UNROLL_LOOP, mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, dttmaxenums::L, mathSSE::sqrt(), and riemannFit::stride.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

73  {
74  auto const sqrtm_0_0 = std::sqrt(M(0, 0));
75  L(0, 0) = sqrtm_0_0;
76  using T = typename MatrixType1::base_type;
77 
79  for (int i = 1; i < MatrixType1::stride; i++) {
80  T sumsq{0};
81  for (int j = 0; j < i; j++) {
82  T sumsq2{0};
83  auto const m_i_j = M(i, j);
84  for (int k = 0; k < j; ++k)
85  sumsq2 += L(i, k) * L(j, k);
86 
87  auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
88  L(i, j) = value_i_j;
89 
90  sumsq += value_i_j * value_i_j;
91  }
92 
93  auto const l_i_i = std::sqrt(M(i, i) - sumsq);
94  L(i, i) = l_i_i;
95  }
96  }
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
constexpr uint32_t stride
Definition: HelixFit.h:22
T sqrt(T t)
Definition: SSEVec.h:19
long double T

◆ fnnls()

template<typename MatrixType , typename VectorType >
EIGEN_DEVICE_FUNC void calo::multifit::fnnls ( MatrixType const &  AtA,
VectorType const &  Atb,
VectorType solution,
int &  npassive,
ColumnVector< VectorType::RowsAtCompileTime, int > &  pulseOffsets,
MapSymM< float, VectorType::RowsAtCompileTime > &  matrixL,
double  eps,
const int  maxIterations,
const int  relaxationPeriod,
const int  relaxationFactor 
)

Definition at line 382 of file MultifitComputations.h.

References simBeamSpotPI::alpha, CMS_UNROLL_LOOP, compute_decomposition_forwardsubst_with_offsets(), ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), HLT_2024v13_cff::eps, mps_fire::i, CommonMethods::isnan(), dqmiolumiharvest::j, SiStripPI::max, HLT_2024v13_cff::maxIterations, HLT_2024v13_cff::pulseOffsets, particleFlowDisplacedVertex_cfi::ratio, alignCSCRings::s, edm::swap(), dqmMemoryStats::total, update_decomposition_forwardsubst_with_offsets(), and w().

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

391  { // multiply "eps" by "relaxationFactor"
392  // constants
393  constexpr auto NPULSES = VectorType::RowsAtCompileTime;
394 
395  // to keep track of where to terminate if converged
396  Eigen::Index w_max_idx_prev = 0;
397  float w_max_prev = 0;
398  bool recompute = false;
399 
400  // used throughout
401  VectorType s;
402  float reg_b[NPULSES];
403  //float matrixLStorage[MapSymM<float, NPULSES>::total];
404  //MapSymM<float, NPULSES> matrixL{matrixLStorage};
405 
406  int iter = 0;
407  while (true) {
408  if (iter > 0 || npassive == 0) {
409  auto const nactive = NPULSES - npassive;
410  // exit if there are no more pulses to constrain
411  if (nactive == 0)
412  break;
413 
414  // compute the gradient
415  //w.tail(nactive) = Atb.tail(nactive) - (AtA * solution).tail(nactive);
416  Eigen::Index w_max_idx = 0;
417  float w_max = -std::numeric_limits<float>::max();
418  for (int icol = npassive; icol < NPULSES; icol++) {
419  auto const icol_real = pulseOffsets(icol);
420  auto const atb = Atb(icol_real);
421  float sum = 0;
423  for (int counter = 0; counter < NPULSES; counter++)
424  sum += counter > icol_real ? AtA(counter, icol_real) * solution(counter)
425  : AtA(icol_real, counter) * solution(counter);
426 
427  auto const w = atb - sum;
428  if (w > w_max) {
429  w_max = w;
430  w_max_idx = icol - npassive;
431  }
432  }
433 
434  // check for convergence
435  if (w_max < eps || (w_max_idx == w_max_idx_prev && w_max == w_max_prev))
436  break;
437 
438  if (iter >= maxIterations)
439  break;
440 
441  w_max_prev = w_max;
442  w_max_idx_prev = w_max_idx;
443 
444  // move index to the right part of the vector
445  w_max_idx += npassive;
446 
447  Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(w_max_idx));
448  ++npassive;
449  }
450 
451  // inner loop
452  while (true) {
453  if (npassive == 0)
454  break;
455 
456  //s.head(npassive)
457  //auto const& matrixL =
458  // AtA.topLeftCorner(npassive, npassive)
459  // .llt().matrixL();
460  //.solve(Atb.head(npassive));
461  if (recompute || iter == 0)
462  compute_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
463  else
464  update_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
465 
466  // run backward substituion
467  s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
468  for (int i = npassive - 2; i >= 0; --i) {
469  float total = 0;
470  for (int j = i + 1; j < npassive; j++)
471  total += matrixL(j, i) * s(j);
472 
473  s(i) = (reg_b[i] - total) / matrixL(i, i);
474  }
475 
476  // done if solution values are all positive
477  bool hasNegative = false;
478  bool hasNans = false;
479  for (int counter = 0; counter < npassive; counter++) {
480  auto const s_ii = s(counter);
481  hasNegative |= s_ii <= 0;
482  hasNans |= std::isnan(s_ii);
483  }
484 
485  // FIXME: temporary solution. my cholesky impl is unstable yielding nans
486  // this check removes nans - do not accept solution unless all values
487  // are stable
488  if (hasNans)
489  break;
490  if (!hasNegative) {
491  for (int i = 0; i < npassive; i++) {
492  auto const i_real = pulseOffsets(i);
493  solution(i_real) = s(i);
494  }
495  //solution.head(npassive) = s.head(npassive);
496  recompute = false;
497  break;
498  }
499 
500  // there were negative values -> have to recompute the whole decomp
501  recompute = true;
502 
504  Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
505  for (int i = 0; i < npassive; i++) {
506  if (s[i] <= 0.) {
507  auto const i_real = pulseOffsets(i);
508  auto const ratio = solution[i_real] / (solution[i_real] - s[i]);
509  if (ratio < alpha) {
510  alpha = ratio;
511  alpha_idx = i;
512  alpha_idx_real = i_real;
513  }
514  }
515  }
516 
517  // upadte solution
518  for (int i = 0; i < npassive; i++) {
519  auto const i_real = pulseOffsets(i);
520  solution(i_real) += alpha * (s(i) - solution(i_real));
521  }
522  //solution.head(npassive) += alpha *
523  // (s.head(npassive) - solution.head(npassive));
524  solution[alpha_idx_real] = 0;
525  --npassive;
526 
527  Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(alpha_idx));
528  }
529 
530  // as in cpu
531  ++iter;
532  if (iter % relaxationPeriod == 0)
533  eps *= relaxationFactor;
534  }
535  }
Vec4< T > VectorType
def isnan(num)
T w() const
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(MatrixType1 &L, MatrixType2 const &M, float b[MatrixType1::stride], VectorType const &Atb, int const N, ColumnVector< MatrixType1::stride, int > const &pulseOffsets)
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
void swap(Association< C > &lhs, Association< C > &rhs)
Definition: Association.h:112
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(MatrixType1 &L, MatrixType2 const &M, float b[MatrixType1::stride], VectorType const &Atb, int const N, ColumnVector< MatrixType1::stride, int > const &pulseOffsets)

◆ solve_forward_subst_matrix()

template<typename MatrixType1 , typename MatrixType2 , typename MatrixType3 >
EIGEN_DEVICE_FUNC void calo::multifit::solve_forward_subst_matrix ( MatrixType1 &  A,
MatrixType2 const &  pulseMatrixView,
MatrixType3 const &  matrixL 
)

Definition at line 197 of file MultifitComputations.h.

References cms::cudacompat::__ldg(), A, CMS_UNROLL_LOOP, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and mps_fire::i.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

199  {
200  // FIXME: this assumes pulses are on columns and samples on rows
201  constexpr auto NPULSES = MatrixType2::ColsAtCompileTime;
202  constexpr auto NSAMPLES = MatrixType2::RowsAtCompileTime;
203 
205  for (int icol = 0; icol < NPULSES; icol++) {
206  float reg_b[NSAMPLES];
207  float reg_L[NSAMPLES];
208 
209  // preload a column and load column 0 of cholesky
211  for (int i = 0; i < NSAMPLES; i++) {
212 #ifdef __CUDA_ARCH__
213  // load through the read-only cache
214  reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
215 #else
216  reg_b[i] = pulseMatrixView.coeffRef(i, icol);
217 #endif // __CUDA_ARCH__
218  reg_L[i] = matrixL(i, 0);
219  }
220 
221  // compute x0 and store it
222  auto x_prev = reg_b[0] / reg_L[0];
223  A(0, icol) = x_prev;
224 
225  // iterate
227  for (int iL = 1; iL < NSAMPLES; iL++) {
228  // update accum
230  for (int counter = iL; counter < NSAMPLES; counter++)
231  reg_b[counter] -= x_prev * reg_L[counter];
232 
233  // load the next column of cholesky
235  for (int counter = iL; counter < NSAMPLES; counter++)
236  reg_L[counter] = matrixL(counter, iL);
237 
238  // compute the next x for M(iL, icol)
239  x_prev = reg_b[iL] / reg_L[iL];
240 
241  // store the result value
242  A(iL, icol) = x_prev;
243  }
244  }
245  }
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
T __ldg(T const *x)
Definition: cudaCompat.h:137
Definition: APVGainStruct.h:7

◆ solve_forward_subst_vector()

template<typename MatrixType1 , typename MatrixType2 >
EIGEN_DEVICE_FUNC void calo::multifit::solve_forward_subst_vector ( float  reg_b[MatrixType1::RowsAtCompileTime],
MatrixType1  inputAmplitudesView,
MatrixType2  matrixL 
)

Definition at line 248 of file MultifitComputations.h.

References CMS_UNROLL_LOOP, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and mps_fire::i.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

250  {
251  constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
252 
253  float reg_b_tmp[NSAMPLES];
254  float reg_L[NSAMPLES];
255 
256  // preload a column and load column 0 of cholesky
258  for (int i = 0; i < NSAMPLES; i++) {
259  reg_b_tmp[i] = inputAmplitudesView(i);
260  reg_L[i] = matrixL(i, 0);
261  }
262 
263  // compute x0 and store it
264  auto x_prev = reg_b_tmp[0] / reg_L[0];
265  reg_b[0] = x_prev;
266 
267  // iterate
269  for (int iL = 1; iL < NSAMPLES; iL++) {
270  // update accum
272  for (int counter = iL; counter < NSAMPLES; counter++)
273  reg_b_tmp[counter] -= x_prev * reg_L[counter];
274 
275  // load the next column of cholesky
277  for (int counter = iL; counter < NSAMPLES; counter++)
278  reg_L[counter] = matrixL(counter, iL);
279 
280  // compute the next x for M(iL, icol)
281  x_prev = reg_b_tmp[iL] / reg_L[iL];
282 
283  // store the result value
284  reg_b[iL] = x_prev;
285  }
286  }
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47

◆ update_decomposition_forwardsubst_with_offsets()

template<typename MatrixType1 , typename MatrixType2 , typename VectorType >
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calo::multifit::update_decomposition_forwardsubst_with_offsets ( MatrixType1 &  L,
MatrixType2 const &  M,
float  b[MatrixType1::stride],
VectorType const &  Atb,
int const  N,
ColumnVector< MatrixType1::stride, int > const &  pulseOffsets 
)

Definition at line 165 of file MultifitComputations.h.

References b, mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, dttmaxenums::L, SiStripPI::max, SiStripPI::min, N, HLT_2024v13_cff::pulseOffsets, mathSSE::sqrt(), and dqmMemoryStats::total.

Referenced by fnnls().

171  {
172  using T = typename MatrixType1::base_type;
173  auto const i = N - 1;
174  auto const i_real = pulseOffsets(i);
175  T sumsq{0};
176  T total = 0;
177  for (int j = 0; j < i; j++) {
178  auto const j_real = pulseOffsets(j);
179  T sumsq2{0};
180  auto const m_i_j = M(std::max(i_real, j_real), std::min(i_real, j_real));
181  for (int k = 0; k < j; ++k)
182  sumsq2 += L(i, k) * L(j, k);
183 
184  auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
185  L(i, j) = value_i_j;
186  sumsq += value_i_j * value_i_j;
187 
188  total += value_i_j * b[j];
189  }
190 
191  auto const l_i_i = std::sqrt(M(i_real, i_real) - sumsq);
192  L(i, i) = l_i_i;
193  b[i] = (Atb(i_real) - total) / l_i_i;
194  }
T sqrt(T t)
Definition: SSEVec.h:19
#define N
Definition: blowfish.cc:9
double b
Definition: hdecay.h:120
long double T