CMS 3D CMS Logo

Classes | Typedefs | Functions | Variables
Matriplex Namespace Reference

Classes

struct  CholeskyInverter
 
struct  CholeskyInverter< T, 3, N >
 
struct  CholeskyInverterSym
 
struct  CholeskyInverterSym< T, 3, N >
 
struct  CramerInverter
 
struct  CramerInverter< T, 2, N >
 
struct  CramerInverter< T, 3, N >
 
struct  CramerInverterSym
 
struct  CramerInverterSym< T, 2, N >
 
struct  CramerInverterSym< T, 3, N >
 
class  MatriplexVector
 
struct  MultiplyCls
 
struct  MultiplyCls< T, 3, N >
 
struct  MultiplyCls< T, 6, N >
 
struct  SymMultiplyCls
 
struct  SymMultiplyCls< T, 3, N >
 
struct  SymMultiplyCls< T, 6, N >
 

Typedefs

typedef int idx_t
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
using MPlex = Matriplex< T, D1, D2, N >
 
template<typename T , idx_t D, idx_t N>
using MPlexSym = MatriplexSym< T, D, N >
 
template<class MP >
using MPlexVec = MatriplexVector< MP >
 

Functions

template<typename T , idx_t D, idx_t N>
class __attribute__ ((aligned(32))) MatriplexSym
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
class __attribute__ ((aligned(32))) Matriplex
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nabs (const MPlex< T, D1, D2, N > &a)
 
void align_check (const char *pref, void *adr)
 
void * aligned_alloc64 (std::size_t size)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Ncos (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nhypot (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D, idx_t N>
void invertCholesky (MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCholesky (MPlex< T, D, D, N > &A)
 
template<typename T , idx_t D, idx_t N>
void invertCholeskySym (MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCholeskySym (MPlexSym< T, D, N > &A)
 
template<typename T , idx_t D, idx_t N>
void invertCramer (MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCramer (MPlex< T, D, D, N > &A, double *determ=nullptr)
 
template<typename T , idx_t D, idx_t N>
void invertCramerSym (MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCramerSym (MPlexSym< T, D, N > &A, double *determ=nullptr)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nmax (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nmin (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
void min_max (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b, MPlex< T, D1, D2, N > &min, MPlex< T, D1, D2, N > &max)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiply (const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlexSym< T, D, N > &A, const MPlexSym< T, D, N > &B, MPlex< T, D, D, N > &C)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiply3in (MPlexVec< MPlex< T, D1, D2, N >> &A, MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiplyGeneral (const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiplyGeneral (const MPlex< T, D1, D2, N > &A, const MPlex< T, D2, D3, N > &B, MPlex< T, D1, D3, N > &C)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (T a, const MPlex< T, D1, D2, N > &b)
 
constexpr std::size_t round_up_align64 (std::size_t size)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsin (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
void sincos (const MPlex< T, D1, D2, N > &a, MPlex< T, D1, D2, N > &s, MPlex< T, D1, D2, N > &c)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsqr (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsqrt (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Ntan (const MPlex< T, D1, D2, N > &a)
 

Variables

const idx_t gSymOffsets [7][36]
 

Typedef Documentation

◆ idx_t

typedef int Matriplex::idx_t

Definition at line 98 of file MatriplexCommon.h.

◆ MPlex

template<typename T , idx_t D1, idx_t D2, idx_t N>
using Matriplex::MPlex = typedef Matriplex<T, D1, D2, N>

Definition at line 327 of file Matriplex.h.

◆ MPlexSym

template<typename T , idx_t D, idx_t N>
using Matriplex::MPlexSym = typedef MatriplexSym<T, D, N>

Definition at line 279 of file MatriplexSym.h.

◆ MPlexVec

template<class MP >
using Matriplex::MPlexVec = typedef MatriplexVector<MP>

Definition at line 48 of file MatriplexVector.h.

Function Documentation

◆ __attribute__() [1/2]

template<typename T , idx_t D, idx_t N>
class Matriplex::__attribute__ ( (aligned(32))  )

no. of matrix rows

no. of matrix columns

no of elements: lower triangle

size of the whole matriplex

Definition at line 25 of file MatriplexSym.h.

References a, PVValHelper::add(), ASSUME_ALIGNED, b, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), filterCSVwithJSON::copy, ztail::d, gSymOffsets, mps_fire::i, recoMuon::in, dqmiolumiharvest::j, dqmdumpme::k, visualization-live-secondInstance_cfg::m, N, dqmiodumpmetadata::n, hgchebackDigitizer_cfi::noise, unpackBuffers-CaloStage1::offsets, operator=(), operator[](), AlCaHLTBitMon_ParallelJobs::p, alignCSCRings::s, pfClustersFromCombinedCaloHF_cfi::scale, TrackRefitter_38T_cff::src, electronEcalRecHitIsolationLcone_cfi::subtract, submitPVValidationJobs::t, createJobs::tmp, mitigatedMETSequence_cff::U, interactiveExample::ui, findQualityFiles::v, and geometryCSVtoXML::xx.

25  {
26  public:
27  typedef T value_type;
28 
30  static constexpr int kRows = D;
32  static constexpr int kCols = D;
34  static constexpr int kSize = (D + 1) * D / 2;
36  static constexpr int kTotSize = N * kSize;
37 
38  T fArray[kTotSize];
39 
40  MatriplexSym() {}
41  MatriplexSym(T v) { setVal(v); }
42 
43  idx_t plexSize() const { return N; }
44 
45  void setVal(T v) {
46  for (idx_t i = 0; i < kTotSize; ++i) {
47  fArray[i] = v;
48  }
49  }
50 
51  void add(const MatriplexSym& v) {
52  for (idx_t i = 0; i < kTotSize; ++i) {
53  fArray[i] += v.fArray[i];
54  }
55  }
56 
57  void scale(T scale) {
58  for (idx_t i = 0; i < kTotSize; ++i) {
59  fArray[i] *= scale;
60  }
61  }
62 
63  T operator[](idx_t xx) const { return fArray[xx]; }
64  T& operator[](idx_t xx) { return fArray[xx]; }
65 
66  const idx_t* offsets() const { return gSymOffsets[D]; }
67  idx_t off(idx_t i) const { return gSymOffsets[D][i]; }
68 
69  const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[off(i * D + j) * N + n]; }
70 
71  T& At(idx_t n, idx_t i, idx_t j) { return fArray[off(i * D + j) * N + n]; }
72 
73  T& operator()(idx_t n, idx_t i, idx_t j) { return At(n, i, j); }
74  const T& operator()(idx_t n, idx_t i, idx_t j) const { return constAt(n, i, j); }
75 
76  MatriplexSym& operator=(const MatriplexSym& m) {
77  memcpy(fArray, m.fArray, sizeof(T) * kTotSize);
78  return *this;
79  }
80 
81  MatriplexSym(const MatriplexSym& m) = default;
82 
83  void copySlot(idx_t n, const MatriplexSym& m) {
84  for (idx_t i = n; i < kTotSize; i += N) {
85  fArray[i] = m.fArray[i];
86  }
87  }
88 
89  void copyIn(idx_t n, const T* arr) {
90  for (idx_t i = n; i < kTotSize; i += N) {
91  fArray[i] = *(arr++);
92  }
93  }
94 
95  void copyIn(idx_t n, const MatriplexSym& m, idx_t in) {
96  for (idx_t i = n; i < kTotSize; i += N, in += N) {
97  fArray[i] = m[in];
98  }
99  }
100 
101  void copy(idx_t n, idx_t in) {
102  for (idx_t i = n; i < kTotSize; i += N, in += N) {
103  fArray[i] = fArray[in];
104  }
105  }
106 
107 #if defined(AVX512_INTRINSICS)
108 
109  template <typename U>
110  void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
111  //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
112 
113  const __m512 src = {0};
114  const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
115 
116  for (int i = 0; i < kSize; ++i, ++arr) {
117  //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
118 
119  __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
120  _mm512_mask_store_ps(&fArray[i * N], k, reg);
121  }
122  }
123 
124  // Experimental methods, slurpIn() seems to be at least as fast.
125  // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
126 
127  void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
128  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
129 
130  for (int i = 0; i < N; ++i) {
131  __m512 reg = _mm512_load_ps(arr + vi[i]);
132  _mm512_store_ps((void*)(tmp + 64 * i), reg);
133  }
134 
135  for (int i = 0; i < kSize; ++i) {
136  __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
137  _mm512_store_ps(&fArray[i * N], reg);
138  }
139  }
140 
141  void Contaginate(const char* arr, int vi[N], const char* tmp) {
142  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
143 
144  for (int i = 0; i < N; ++i) {
145  __m512 reg = _mm512_load_ps(arr + vi[i]);
146  _mm512_store_ps((void*)(tmp + 64 * i), reg);
147  }
148  }
149 
150  void Plexify(const char* tmp, __m512i& ui) {
151  for (int i = 0; i < kSize; ++i) {
152  __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
153  _mm512_store_ps(&fArray[i * N], reg);
154  }
155  }
156 
157 #elif defined(AVX2_INTRINSICS)
158 
159  template <typename U>
160  void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
161  const __m256 src = {0};
162 
163  __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
164  __m256i k_sel = _mm256_set1_epi32(N_proc);
165  __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
166 
167  k = k_master;
168  for (int i = 0; i < kSize; ++i, ++arr) {
169  __m256 reg = _mm256_mask_i32gather_ps(src, arr, vi, (__m256)k, sizeof(U));
170  // Restore mask (docs say gather clears it but it doesn't seem to).
171  k = k_master;
172  _mm256_maskstore_ps(&fArray[i * N], k, reg);
173  }
174  }
175 
176 #else
177 
178  void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
179  // Separate N_proc == N case (gains about 7% in fit test).
180  if (N_proc == N) {
181  for (int i = 0; i < kSize; ++i) {
182  for (int j = 0; j < N; ++j) {
183  fArray[i * N + j] = *(arr + i + vi[j]);
184  }
185  }
186  } else {
187  for (int i = 0; i < kSize; ++i) {
188  for (int j = 0; j < N_proc; ++j) {
189  fArray[i * N + j] = *(arr + i + vi[j]);
190  }
191  }
192  }
193  }
194 
195 #endif
196 
197  void copyOut(idx_t n, T* arr) const {
198  for (idx_t i = n; i < kTotSize; i += N) {
199  *(arr++) = fArray[i];
200  }
201  }
202 
203  void setDiagonal3x3(idx_t n, T d) {
204  T* p = fArray + n;
205 
206  p[0 * N] = d;
207  p[1 * N] = 0;
208  p[2 * N] = d;
209  p[3 * N] = 0;
210  p[4 * N] = 0;
211  p[5 * N] = d;
212  }
213 
214  MatriplexSym& subtract(const MatriplexSym& a, const MatriplexSym& b) {
215  // Does *this = a - b;
216 
217 #pragma omp simd
218  for (idx_t i = 0; i < kTotSize; ++i) {
219  fArray[i] = a.fArray[i] - b.fArray[i];
220  }
221 
222  return *this;
223  }
224 
225  // ==================================================================
226  // Operations specific to Kalman fit in 6 parameter space
227  // ==================================================================
228 
229  void addNoiseIntoUpperLeft3x3(T noise) {
230  T* p = fArray;
231  ASSUME_ALIGNED(p, 64);
232 
233 #pragma omp simd
234  for (idx_t n = 0; n < N; ++n) {
235  p[0 * N + n] += noise;
236  p[2 * N + n] += noise;
237  p[5 * N + n] += noise;
238  }
239  }
240 
241  void invertUpperLeft3x3() {
242  typedef T TT;
243 
244  T* a = fArray;
245  ASSUME_ALIGNED(a, 64);
246 
247 #pragma omp simd
248  for (idx_t n = 0; n < N; ++n) {
249  const TT c00 = a[2 * N + n] * a[5 * N + n] - a[4 * N + n] * a[4 * N + n];
250  const TT c01 = a[4 * N + n] * a[3 * N + n] - a[1 * N + n] * a[5 * N + n];
251  const TT c02 = a[1 * N + n] * a[4 * N + n] - a[2 * N + n] * a[3 * N + n];
252  const TT c11 = a[5 * N + n] * a[0 * N + n] - a[3 * N + n] * a[3 * N + n];
253  const TT c12 = a[3 * N + n] * a[1 * N + n] - a[4 * N + n] * a[0 * N + n];
254  const TT c22 = a[0 * N + n] * a[2 * N + n] - a[1 * N + n] * a[1 * N + n];
255 
256  // Force determinant calculation in double precision.
257  const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[3 * N + n] * c02;
258  const TT s = TT(1) / det;
259 
260  a[0 * N + n] = s * c00;
261  a[1 * N + n] = s * c01;
262  a[2 * N + n] = s * c11;
263  a[3 * N + n] = s * c02;
264  a[4 * N + n] = s * c12;
265  a[5 * N + n] = s * c22;
266  }
267  }
268 
269  Matriplex<T, 1, 1, N> ReduceFixedIJ(idx_t i, idx_t j) const {
270  Matriplex<T, 1, 1, N> t;
271  for (idx_t n = 0; n < N; ++n) {
272  t[n] = constAt(n, i, j);
273  }
274  return t;
275  }
276  };
Basic3DVector & operator=(const Basic3DVector &)=default
Assignment operator.
const idx_t gSymOffsets[7][36]
Definition: MatriplexSym.h:13
d
Definition: ztail.py:151
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
double b
Definition: hdecay.h:120
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
double a
Definition: hdecay.h:121
T operator[](int i) const
tmp
align.sh
Definition: createJobs.py:716
long double T
#define ASSUME_ALIGNED(a, b)

◆ __attribute__() [2/2]

template<typename T , idx_t D1, idx_t D2, idx_t N>
class Matriplex::__attribute__ ( (aligned(32))  )

return no. of matrix rows

return no. of matrix columns

return no of elements: rows*columns

size of the whole matriplex

Definition at line 11 of file Matriplex.h.

References a, abs(), PVValHelper::add(), b, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), filterCSVwithJSON::copy, cos(), hypot(), mps_fire::i, recoMuon::in, dqmiolumiharvest::j, dqmdumpme::k, visualization-live-secondInstance_cfg::m, N, dqmiodumpmetadata::n, operator*=(), operator+=(), operator-(), operator-=(), operator/=(), operator=(), operator[](), pfClustersFromCombinedCaloHF_cfi::scale, sin(), sqr(), sqrt(), TrackRefitter_38T_cff::src, submitPVValidationJobs::t, tan(), createJobs::tmp, mitigatedMETSequence_cff::U, interactiveExample::ui, findQualityFiles::v, and geometryCSVtoXML::xx.

11  {
12  public:
13  typedef T value_type;
14 
16  static constexpr int kRows = D1;
18  static constexpr int kCols = D2;
20  static constexpr int kSize = D1 * D2;
22  static constexpr int kTotSize = N * kSize;
23 
24  T fArray[kTotSize];
25 
26  Matriplex() {}
27  Matriplex(T v) { setVal(v); }
28 
29  idx_t plexSize() const { return N; }
30 
31  void setVal(T v) {
32  for (idx_t i = 0; i < kTotSize; ++i) {
33  fArray[i] = v;
34  }
35  }
36 
37  void add(const Matriplex& v) {
38  for (idx_t i = 0; i < kTotSize; ++i) {
39  fArray[i] += v.fArray[i];
40  }
41  }
42 
43  void scale(T scale) {
44  for (idx_t i = 0; i < kTotSize; ++i) {
45  fArray[i] *= scale;
46  }
47  }
48 
49  T operator[](idx_t xx) const { return fArray[xx]; }
50  T& operator[](idx_t xx) { return fArray[xx]; }
51 
52  const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
53 
54  T& At(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
55 
56  T& operator()(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
57  const T& operator()(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
58 
60  for (idx_t i = 0; i < kTotSize; ++i)
61  fArray[i] = t;
62  return *this;
63  }
64 
66  for (idx_t i = 0; i < kTotSize; ++i)
67  fArray[i] += t;
68  return *this;
69  }
70 
72  for (idx_t i = 0; i < kTotSize; ++i)
73  fArray[i] -= t;
74  return *this;
75  }
76 
78  for (idx_t i = 0; i < kTotSize; ++i)
79  fArray[i] *= t;
80  return *this;
81  }
82 
84  for (idx_t i = 0; i < kTotSize; ++i)
85  fArray[i] /= t;
86  return *this;
87  }
88 
89  Matriplex& operator+=(const Matriplex& a) {
90  for (idx_t i = 0; i < kTotSize; ++i)
91  fArray[i] += a.fArray[i];
92  return *this;
93  }
94 
95  Matriplex& operator-=(const Matriplex& a) {
96  for (idx_t i = 0; i < kTotSize; ++i)
97  fArray[i] -= a.fArray[i];
98  return *this;
99  }
100 
101  Matriplex& operator*=(const Matriplex& a) {
102  for (idx_t i = 0; i < kTotSize; ++i)
103  fArray[i] *= a.fArray[i];
104  return *this;
105  }
106 
107  Matriplex& operator/=(const Matriplex& a) {
108  for (idx_t i = 0; i < kTotSize; ++i)
109  fArray[i] /= a.fArray[i];
110  return *this;
111  }
112 
113  Matriplex operator-() {
114  Matriplex t;
115  for (idx_t i = 0; i < kTotSize; ++i)
116  t.fArray[i] = -fArray[i];
117  return t;
118  }
119 
120  Matriplex& abs(const Matriplex& a) {
121  for (idx_t i = 0; i < kTotSize; ++i)
122  fArray[i] = std::abs(a.fArray[i]);
123  return *this;
124  }
125  Matriplex& abs() {
126  for (idx_t i = 0; i < kTotSize; ++i)
127  fArray[i] = std::abs(fArray[i]);
128  return *this;
129  }
130 
131  Matriplex& sqrt(const Matriplex& a) {
132  for (idx_t i = 0; i < kTotSize; ++i)
133  fArray[i] = std::sqrt(a.fArray[i]);
134  return *this;
135  }
136  Matriplex& sqrt() {
137  for (idx_t i = 0; i < kTotSize; ++i)
138  fArray[i] = std::sqrt(fArray[i]);
139  return *this;
140  }
141 
142  Matriplex& sqr(const Matriplex& a) {
143  for (idx_t i = 0; i < kTotSize; ++i)
144  fArray[i] = a.fArray[i] * a.fArray[i];
145  return *this;
146  }
147  Matriplex& sqr() {
148  for (idx_t i = 0; i < kTotSize; ++i)
149  fArray[i] = fArray[i] * fArray[i];
150  return *this;
151  }
152 
153  Matriplex& hypot(const Matriplex& a, const Matriplex& b) {
154  for (idx_t i = 0; i < kTotSize; ++i) {
155  fArray[i] = a.fArray[i] * a.fArray[i] + b.fArray[i] * b.fArray[i];
156  }
157  return sqrt();
158  }
159 
160  Matriplex& sin(const Matriplex& a) {
161  for (idx_t i = 0; i < kTotSize; ++i)
162  fArray[i] = std::sin(a.fArray[i]);
163  return *this;
164  }
165  Matriplex& sin() {
166  for (idx_t i = 0; i < kTotSize; ++i)
167  fArray[i] = std::sin(fArray[i]);
168  return *this;
169  }
170 
171  Matriplex& cos(const Matriplex& a) {
172  for (idx_t i = 0; i < kTotSize; ++i)
173  fArray[i] = std::cos(a.fArray[i]);
174  return *this;
175  }
176  Matriplex& cos() {
177  for (idx_t i = 0; i < kTotSize; ++i)
178  fArray[i] = std::cos(fArray[i]);
179  return *this;
180  }
181 
182  Matriplex& tan(const Matriplex& a) {
183  for (idx_t i = 0; i < kTotSize; ++i)
184  fArray[i] = std::tan(a.fArray[i]);
185  return *this;
186  }
187  Matriplex& tan() {
188  for (idx_t i = 0; i < kTotSize; ++i)
189  fArray[i] = std::tan(fArray[i]);
190  return *this;
191  }
192 
193  //---------------------------------------------------------
194 
195  void copySlot(idx_t n, const Matriplex& m) {
196  for (idx_t i = n; i < kTotSize; i += N) {
197  fArray[i] = m.fArray[i];
198  }
199  }
200 
201  void copyIn(idx_t n, const T* arr) {
202  for (idx_t i = n; i < kTotSize; i += N) {
203  fArray[i] = *(arr++);
204  }
205  }
206 
207  void copyIn(idx_t n, const Matriplex& m, idx_t in) {
208  for (idx_t i = n; i < kTotSize; i += N, in += N) {
209  fArray[i] = m[in];
210  }
211  }
212 
213  void copy(idx_t n, idx_t in) {
214  for (idx_t i = n; i < kTotSize; i += N, in += N) {
215  fArray[i] = fArray[in];
216  }
217  }
218 
219 #if defined(AVX512_INTRINSICS)
220 
221  template <typename U>
222  void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
223  //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
224 
225  const __m512 src = {0};
226  const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
227 
228  for (int i = 0; i < kSize; ++i, ++arr) {
229  //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
230 
231  __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
232  _mm512_mask_store_ps(&fArray[i * N], k, reg);
233  }
234  }
235 
236  // Experimental methods, slurpIn() seems to be at least as fast.
237  // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
238  void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
239  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
240 
241  for (int i = 0; i < N; ++i) {
242  __m512 reg = _mm512_load_ps(arr + vi[i]);
243  _mm512_store_ps((void*)(tmp + 64 * i), reg);
244  }
245 
246  for (int i = 0; i < kSize; ++i) {
247  __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
248  _mm512_store_ps(&fArray[i * N], reg);
249  }
250  }
251 
252  void Contaginate(const char* arr, int vi[N], const char* tmp) {
253  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
254 
255  for (int i = 0; i < N; ++i) {
256  __m512 reg = _mm512_load_ps(arr + vi[i]);
257  _mm512_store_ps((void*)(tmp + 64 * i), reg);
258  }
259  }
260 
261  void Plexify(const char* tmp, __m512i& ui) {
262  for (int i = 0; i < kSize; ++i) {
263  __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
264  _mm512_store_ps(&fArray[i * N], reg);
265  }
266  }
267 
268 #elif defined(AVX2_INTRINSICS)
269 
270  template <typename U>
271  void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
272  // Casts to float* needed to "support" also T=HitOnTrack.
273  // Note that sizeof(float) == sizeof(HitOnTrack) == 4.
274 
275  const __m256 src = {0};
276 
277  __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
278  __m256i k_sel = _mm256_set1_epi32(N_proc);
279  __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
280 
281  k = k_master;
282  for (int i = 0; i < kSize; ++i, ++arr) {
283  __m256 reg = _mm256_mask_i32gather_ps(src, (float*)arr, vi, (__m256)k, sizeof(U));
284  // Restore mask (docs say gather clears it but it doesn't seem to).
285  k = k_master;
286  _mm256_maskstore_ps((float*)&fArray[i * N], k, reg);
287  }
288  }
289 
290 #else
291 
292  void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
293  // Separate N_proc == N case (gains about 7% in fit test).
294  if (N_proc == N) {
295  for (int i = 0; i < kSize; ++i) {
296  for (int j = 0; j < N; ++j) {
297  fArray[i * N + j] = *(arr + i + vi[j]);
298  }
299  }
300  } else {
301  for (int i = 0; i < kSize; ++i) {
302  for (int j = 0; j < N_proc; ++j) {
303  fArray[i * N + j] = *(arr + i + vi[j]);
304  }
305  }
306  }
307  }
308 
309 #endif
310 
311  void copyOut(idx_t n, T* arr) const {
312  for (idx_t i = n; i < kTotSize; i += N) {
313  *(arr++) = fArray[i];
314  }
315  }
316 
317  Matriplex<T, 1, 1, N> ReduceFixedIJ(idx_t i, idx_t j) const {
318  Matriplex<T, 1, 1, N> t;
319  for (idx_t n = 0; n < N; ++n) {
320  t[n] = constAt(n, i, j);
321  }
322  return t;
323  }
324  };
Basic3DVector & operator*=(T t)
Scaling by a scalar value (multiplication)
Divides< B, C > D2
Definition: Factorize.h:137
Basic3DVector & operator=(const Basic3DVector &)=default
Assignment operator.
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:436
MPlex< T, D1, D2, N > sin(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:442
Basic3DVector & operator-=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > sqr(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:430
Divides< A, C > D1
Definition: Factorize.h:136
MPlex< T, D1, D2, N > abs(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:418
MPlex< T, D1, D2, N > operator-(T a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:397
#define N
Definition: blowfish.cc:9
Basic3DVector & operator/=(T t)
Scaling by a scalar value (division)
MPlex< T, D1, D2, N > tan(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:462
double b
Definition: hdecay.h:120
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
double a
Definition: hdecay.h:121
T operator[](int i) const
tmp
align.sh
Definition: createJobs.py:716
long double T
Basic3DVector & operator+=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > cos(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:448
MPlex< T, D1, D2, N > sqrt(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:424

◆ abs()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::abs ( const MPlex< T, D1, D2, N > &  a)

Definition at line 418 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__().

418  {
419  MPlex<T, D1, D2, N> t;
420  return t.abs(a);
421  }
double a
Definition: hdecay.h:121

◆ align_check()

void Matriplex::align_check ( const char *  pref,
void *  adr 
)

Definition at line 4 of file MatriplexCommon.cc.

Referenced by mkfit::MkFitter::checkAlignment().

4  {
5  printf("%s 0x%llx - modulo 64 = %lld\n", pref, (long long unsigned)adr, (long long)adr % 64);
6  }

◆ aligned_alloc64()

void* Matriplex::aligned_alloc64 ( std::size_t  size)
inline

Definition at line 13 of file Memory.h.

References aligned_alloc(), and round_up_align64().

Referenced by mkfit::Pool< mkfit::MkFitter >::create(), and Matriplex::MatriplexVector< MP >::MatriplexVector().

size
Write out results.
void * aligned_alloc(size_t alignment, size_t size) noexcept
constexpr std::size_t round_up_align64(std::size_t size)
Definition: Memory.h:8

◆ cos()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::cos ( const MPlex< T, D1, D2, N > &  a)

Definition at line 448 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), and sincos().

448  {
449  MPlex<T, D1, D2, N> t;
450  return t.cos(a);
451  }
double a
Definition: hdecay.h:121

◆ hypot()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::hypot ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 436 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

Referenced by __attribute__(), mkfit::MkFinder::addBestHit(), RegressionHelper::applyCombinationRegression(), objects.METAnalyzer.METAnalyzer::applyDeltaMet(), mkfit::MkFinder::bkFitFitTracks(), mkfit::MkFinder::bkFitFitTracksBH(), PFEGammaAlgo::calculateEleMVA(), PhotonEnergyCalibratorRun2::calibrate(), ElectronEnergyCalibratorRun2::calibrate(), mkfit::TrackBase::canReachRadius(), MuonGEMBaseHarvestor::computeEfficiency(), DQMGenericClient::computeEfficiency(), JetReCalibrator.Type1METCorrector::correct(), mkfit::TrackBase::d0BeamSpot(), L1JetRecoTreeProducer::doPFMetNoMu(), L1JetRecoTreeProducer::doPUPPIMetNoMu(), HLTRegionalEcalResonanceFilter::doSelection(), pat::PATMuonProducer::embedHighLevel(), pat::PATElectronProducer::embedHighLevel(), trklet::TrackletEventProcessor::event(), Phase2TrackerMonitorDigi::fillITPixelDigiHistos(), LHETablesProducer::fillLHEObjectTable(), Phase2TrackerMonitorDigi::fillOTDigiHistos(), Phase2TrackerValidateDigi::fillSimHitInfo(), mkfit::MkFinder::findCandidates(), GEMEfficiencyAnalyzer::findCSCSegmentCosmics(), GenParticles2HepMCConverter::FourVector(), Point::GetSigmaDeltaMu(), mkfit::kalmanOperation(), mkfit::kalmanPropagateAndComputeChi2(), mkfit::kalmanPropagateAndUpdate(), objects.METAnalyzer::makeGenTkMet(), mkfit::TrackBase::maxReachRadius(), LowPtElectronModifier::modifyObject(), l1tpf::ParametricResolution::operator()(), reco::parser::hypot_f::operator()(), mkfit::MkFitter::printPt(), DeepMETProducer::produce(), DeepMETSonicProducer::produce(), L1TPFMetNoMuProducer::produce(), PseudoTopProducer::produce(), PATTracksToPackedCandidates::produce(), EvtPlaneProducer::produce(), L1FPGATrackProducer::produce(), mkfit::mini_propagators::InitialState::propagate_to_r(), mkfit::mini_propagators::InitialStatePlex::propagate_to_r(), mkfit::propagateHelixToPlaneMPlex(), mkfit::propagateHelixToZMPlex(), mkfit::MkBase::propagateTracksToHitR(), mkfit::MkBase::propagateTracksToPCAZ(), trklet::L1TStub::r(), mkfit::TrackBase::rAtZ(), EcalUncalibRecHitWorkerMultiFit::run(), pf2pat::IPCutPFCandidateSelectorDefinition::select(), MultiTrackSelector::select(), HIMultiTrackSelector::select(), mkfit::MkFinder::selectHitIndices(), mkfit::MkFinder::selectHitIndicesV2(), pat::LeptonUpdater< T >::setDZ(), EGEtScaleSysModifier::setEcalEnergy(), ElectronEnergyCalibrator::setEcalEnergy(), PhotonEnergyCalibrator::setEnergyAndSystVarations(), JetReCalibrator::setFakeRawMETOnOldMiniAODs(), BJetId::setNNVectorVar(), pat::MET::shiftedP4(), pat::MET::shiftedP4_74x(), XHistogram::splitSegment(), DD4hep_XHistogram::splitSegment(), objects.METAnalyzer::sumXY(), reco::ForwardProton::t(), and mkfit::TrackBase::zAtR().

436  {
437  MPlex<T, D1, D2, N> t;
438  return t.hypot(a, b);
439  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ invertCholesky() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholesky ( MPlexVec< MPlex< T, D, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 126 of file MatriplexVector.h.

References A, mps_fire::i, invertCholesky(), and np.

126  {
127  const int np = n_to_process ? n_to_process : A.size();
128 
129  for (int i = 0; i < np; ++i) {
130  invertCholesky(A[i]);
131  }
132  }
int np
Definition: AMPTWrapper.h:43
void invertCholesky(MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCholesky() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholesky ( MPlex< T, D, D, N > &  A)

◆ invertCholeskySym() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholeskySym ( MPlexVec< MPlexSym< T, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 144 of file MatriplexVector.h.

References A, mps_fire::i, invertCholeskySym(), and np.

144  {
145  const int np = n_to_process ? n_to_process : A.size();
146 
147  for (int i = 0; i < np; ++i) {
149  }
150  }
void invertCholeskySym(MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
int np
Definition: AMPTWrapper.h:43
Definition: APVGainStruct.h:7

◆ invertCholeskySym() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholeskySym ( MPlexSym< T, D, N > &  A)

◆ invertCramer() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramer ( MPlexVec< MPlex< T, D, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 117 of file MatriplexVector.h.

References A, mps_fire::i, invertCramer(), and np.

117  {
118  const int np = n_to_process ? n_to_process : A.size();
119 
120  for (int i = 0; i < np; ++i) {
121  invertCramer(A[i]);
122  }
123  }
int np
Definition: AMPTWrapper.h:43
void invertCramer(MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCramer() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramer ( MPlex< T, D, D, N > &  A,
double *  determ = nullptr 
)

◆ invertCramerSym() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramerSym ( MPlexVec< MPlexSym< T, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 135 of file MatriplexVector.h.

References A, mps_fire::i, invertCramerSym(), and np.

135  {
136  const int np = n_to_process ? n_to_process : A.size();
137 
138  for (int i = 0; i < np; ++i) {
139  invertCramerSym(A[i]);
140  }
141  }
int np
Definition: AMPTWrapper.h:43
void invertCramerSym(MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCramerSym() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramerSym ( MPlexSym< T, D, N > &  A,
double *  determ = nullptr 
)

◆ max()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::max ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 488 of file Matriplex.h.

References a, b, mps_fire::i, and submitPVValidationJobs::t.

Referenced by min_max().

488  {
489  MPlex<T, D1, D2, N> t;
490  for (idx_t i = 0; i < a.kTotSize; ++i) {
491  t.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
492  }
493  return t;
494  }
MPlex< T, D1, D2, N > max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:488
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ min()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::min ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 479 of file Matriplex.h.

References a, b, mps_fire::i, and submitPVValidationJobs::t.

Referenced by min_max().

479  {
480  MPlex<T, D1, D2, N> t;
481  for (idx_t i = 0; i < a.kTotSize; ++i) {
482  t.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
483  }
484  return t;
485  }
MPlex< T, D1, D2, N > min(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:479
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ min_max()

template<typename T , idx_t D1, idx_t D2, idx_t N>
void Matriplex::min_max ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b,
MPlex< T, D1, D2, N > &  min,
MPlex< T, D1, D2, N > &  max 
)

Definition at line 468 of file Matriplex.h.

References a, b, mps_fire::i, max(), and min().

Referenced by mkfit::MkFinder::selectHitIndicesV2().

471  {
472  for (idx_t i = 0; i < a.kTotSize; ++i) {
473  min.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
474  max.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
475  }
476  }
MPlex< T, D1, D2, N > min(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:479
MPlex< T, D1, D2, N > max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:488
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ multiply() [1/4]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiply ( const MPlexVec< MPlex< T, D1, D2, N >> &  A,
const MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 53 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

56  {
57  assert(A.size() == B.size());
58  assert(A.size() == C.size());
59 
60  const int np = n_to_process ? n_to_process : A.size();
61 
62  for (int i = 0; i < np; ++i) {
63  multiply(A[i], B[i], C[i]);
64  }
65  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiply() [2/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlexVec< MPlexSym< T, D, N >> &  A,
const MPlexVec< MPlexSym< T, D, N >> &  B,
MPlexVec< MPlex< T, D, D, N >> &  C,
int  n_to_process = 0 
)

Definition at line 100 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

103  {
104  assert(A.size() == B.size());
105  assert(A.size() == C.size());
106 
107  const int np = n_to_process ? n_to_process : A.size();
108 
109  for (int i = 0; i < np; ++i) {
110  multiply(A[i], B[i], C[i]);
111  }
112  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiply() [3/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlexSym< T, D, N > &  A,
const MPlexSym< T, D, N > &  B,
MPlex< T, D, D, N > &  C 
)

Definition at line 347 of file MatriplexSym.h.

References A, B, correctionTermsCaloMet_cff::C, and Matriplex::SymMultiplyCls< T, D, N >::multiply().

347  {
349  }
Definition: APVGainStruct.h:7
void multiply(const MPlexSym< T, D, N > &A, const MPlexSym< T, D, N > &B, MPlex< T, D, D, N > &C)
Definition: MatriplexSym.h:347
Definition: APVGainStruct.h:7

◆ multiply() [4/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlex< T, D, D, N > &  A,
const MPlex< T, D, D, N > &  B,
MPlex< T, D, D, N > &  C 
)

Definition at line 646 of file Matriplex.h.

References A, B, correctionTermsCaloMet_cff::C, Matriplex::MultiplyCls< T, D, N >::multiply(), and N.

Referenced by multiply(), multiply3in(), and AlignmentExtendedCorrelationsEntry::operator*=().

646  {
647 #ifdef DEBUG
648  printf("Multipl %d %d\n", D, N);
649 #endif
650 
652  }
Definition: APVGainStruct.h:7
void multiply(const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
Definition: Matriplex.h:646
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
Definition: APVGainStruct.h:7

◆ multiply3in()

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiply3in ( MPlexVec< MPlex< T, D1, D2, N >> &  A,
MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 83 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

86  {
87  assert(A.size() == B.size());
88  assert(A.size() == C.size());
89 
90  const int np = n_to_process ? n_to_process : A.size();
91 
92  for (int i = 0; i < np; ++i) {
93  multiply(A[i], B[i], C[i]);
94  multiply(B[i], C[i], A[i]);
95  multiply(C[i], A[i], B[i]);
96  }
97  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiplyGeneral() [1/2]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiplyGeneral ( const MPlexVec< MPlex< T, D1, D2, N >> &  A,
const MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 68 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiplyGeneral(), and np.

71  {
72  assert(A.size() == B.size());
73  assert(A.size() == C.size());
74 
75  const int np = n_to_process ? n_to_process : A.size();
76 
77  for (int i = 0; i < np; ++i) {
78  multiplyGeneral(A[i], B[i], C[i]);
79  }
80  }
Definition: APVGainStruct.h:7
void multiplyGeneral(const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
Definition: APVGainStruct.h:7

◆ multiplyGeneral() [2/2]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiplyGeneral ( const MPlex< T, D1, D2, N > &  A,
const MPlex< T, D2, D3, N > &  B,
MPlex< T, D1, D3, N > &  C 
)

Definition at line 501 of file Matriplex.h.

References A, B, correctionTermsCaloMet_cff::C, trklet::D3, mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, N, and dqmiodumpmetadata::n.

Referenced by multiplyGeneral().

501  {
502  for (idx_t i = 0; i < D1; ++i) {
503  for (idx_t j = 0; j < D3; ++j) {
504  const idx_t ijo = N * (i * D3 + j);
505 
506 #pragma omp simd
507  for (idx_t n = 0; n < N; ++n) {
508  C.fArray[ijo + n] = 0;
509  }
510 
511  for (idx_t k = 0; k < D2; ++k) {
512  const idx_t iko = N * (i * D2 + k);
513  const idx_t kjo = N * (k * D3 + j);
514 
515 #pragma omp simd
516  for (idx_t n = 0; n < N; ++n) {
517  C.fArray[ijo + n] += A.fArray[iko + n] * B.fArray[kjo + n];
518  }
519  }
520  }
521  }
522  }
Divides< B, C > D2
Definition: Factorize.h:137
Definition: APVGainStruct.h:7
Divides< A, C > D1
Definition: Factorize.h:136
#define N
Definition: blowfish.cc:9
Definition: APVGainStruct.h:7

◆ operator*() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 348 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

348  {
349  MPlex<T, D1, D2, N> t = a;
350  t *= b;
351  return t;
352  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator*() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 376 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

376  {
377  MPlex<T, D1, D2, N> t = a;
378  t *= b;
379  return t;
380  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator*() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 404 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

404  {
405  MPlex<T, D1, D2, N> t = a;
406  t *= b;
407  return t;
408  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 334 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

334  {
335  MPlex<T, D1, D2, N> t = a;
336  t += b;
337  return t;
338  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 362 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

362  {
363  MPlex<T, D1, D2, N> t = a;
364  t += b;
365  return t;
366  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 390 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

390  {
391  MPlex<T, D1, D2, N> t = a;
392  t += b;
393  return t;
394  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 341 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

Referenced by __attribute__().

341  {
342  MPlex<T, D1, D2, N> t = a;
343  t -= b;
344  return t;
345  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 369 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

369  {
370  MPlex<T, D1, D2, N> t = a;
371  t -= b;
372  return t;
373  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 397 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

397  {
398  MPlex<T, D1, D2, N> t = a;
399  t -= b;
400  return t;
401  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 355 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

355  {
356  MPlex<T, D1, D2, N> t = a;
357  t /= b;
358  return t;
359  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 383 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

383  {
384  MPlex<T, D1, D2, N> t = a;
385  t /= b;
386  return t;
387  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 411 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

411  {
412  MPlex<T, D1, D2, N> t = a;
413  t /= b;
414  return t;
415  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ round_up_align64()

constexpr std::size_t Matriplex::round_up_align64 ( std::size_t  size)

◆ sin()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sin ( const MPlex< T, D1, D2, N > &  a)

Definition at line 442 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), and sincos().

442  {
443  MPlex<T, D1, D2, N> t;
444  return t.sin(a);
445  }
double a
Definition: hdecay.h:121

◆ sincos()

template<typename T , idx_t D1, idx_t D2, idx_t N>
void Matriplex::sincos ( const MPlex< T, D1, D2, N > &  a,
MPlex< T, D1, D2, N > &  s,
MPlex< T, D1, D2, N > &  c 
)

Definition at line 454 of file Matriplex.h.

References a, HltBtagPostValidation_cff::c, cos(), mps_fire::i, alignCSCRings::s, and sin().

454  {
455  for (idx_t i = 0; i < a.kTotSize; ++i) {
456  s.fArray[i] = std::sin(a.fArray[i]);
457  c.fArray[i] = std::cos(a.fArray[i]);
458  }
459  }
MPlex< T, D1, D2, N > sin(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:442
double a
Definition: hdecay.h:121
MPlex< T, D1, D2, N > cos(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:448

◆ sqr()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sqr ( const MPlex< T, D1, D2, N > &  a)

Definition at line 430 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__().

430  {
431  MPlex<T, D1, D2, N> t;
432  return t.sqr(a);
433  }
double a
Definition: hdecay.h:121

◆ sqrt()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sqrt ( const MPlex< T, D1, D2, N > &  a)

Definition at line 424 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), Matriplex::CholeskyInverter< T, 3, N >::invert(), and mkfit::MkFinder::selectHitIndicesV2().

424  {
425  MPlex<T, D1, D2, N> t;
426  return t.sqrt(a);
427  }
double a
Definition: hdecay.h:121

◆ tan()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::tan ( const MPlex< T, D1, D2, N > &  a)

Definition at line 462 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__().

462  {
463  MPlex<T, D1, D2, N> t;
464  return t.tan(a);
465  }
double a
Definition: hdecay.h:121

Variable Documentation

◆ gSymOffsets

const idx_t Matriplex::gSymOffsets[7][36]
Initial value:
= {{},
{},
{0, 1, 1, 2},
{0, 1, 3, 1, 2, 4, 3, 4, 5},
{},
{},
{0, 1, 3, 6, 10, 15, 1, 2, 4, 7, 11, 16, 3, 4, 5, 8, 12, 17,
6, 7, 8, 9, 13, 18, 10, 11, 12, 13, 14, 19, 15, 16, 17, 18, 19, 20}}

Definition at line 13 of file MatriplexSym.h.

Referenced by __attribute__().