CMS 3D CMS Logo

Classes | Typedefs | Functions | Variables
Matriplex Namespace Reference

Classes

struct  CholeskyInverter
 
struct  CholeskyInverter< T, 3, N >
 
struct  CholeskyInverterSym
 
struct  CholeskyInverterSym< T, 3, N >
 
struct  CramerInverter
 
struct  CramerInverter< T, 2, N >
 
struct  CramerInverter< T, 3, N >
 
struct  CramerInverterSym
 
struct  CramerInverterSym< T, 2, N >
 
struct  CramerInverterSym< T, 3, N >
 
class  MatriplexVector
 
struct  MultiplyCls
 
struct  MultiplyCls< T, 3, N >
 
struct  MultiplyCls< T, 6, N >
 
struct  SymMultiplyCls
 
struct  SymMultiplyCls< T, 3, N >
 
struct  SymMultiplyCls< T, 6, N >
 

Typedefs

typedef int idx_t
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
using MPlex = Matriplex< T, D1, D2, N >
 
template<typename T , idx_t D, idx_t N>
using MPlexSym = MatriplexSym< T, D, N >
 
template<class MP >
using MPlexVec = MatriplexVector< MP >
 

Functions

template<typename T , idx_t D, idx_t N>
class __attribute__ ((aligned(32))) MatriplexSym
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
class __attribute__ ((aligned(32))) Matriplex
 
void align_check (const char *pref, void *adr)
 
void * aligned_alloc64 (std::size_t size)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Ncos (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nhypot (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D, idx_t N>
void invertCholesky (MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCholesky (MPlex< T, D, D, N > &A)
 
template<typename T , idx_t D, idx_t N>
void invertCholeskySym (MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCholeskySym (MPlexSym< T, D, N > &A)
 
template<typename T , idx_t D, idx_t N>
void invertCramer (MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCramer (MPlex< T, D, D, N > &A, double *determ=nullptr)
 
template<typename T , idx_t D, idx_t N>
void invertCramerSym (MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void invertCramerSym (MPlexSym< T, D, N > &A, double *determ=nullptr)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nmax (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nmin (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
void min_max (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b, MPlex< T, D1, D2, N > &min, MPlex< T, D1, D2, N > &max)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiply (const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlexSym< T, D, N > &A, const MPlexSym< T, D, N > &B, MPlex< T, D, D, N > &C)
 
template<typename T , idx_t D, idx_t N>
void multiply (const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiply3in (MPlexVec< MPlex< T, D1, D2, N >> &A, MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiplyGeneral (const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
 
template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void multiplyGeneral (const MPlex< T, D1, D2, N > &A, const MPlex< T, D2, D3, N > &B, MPlex< T, D1, D3, N > &C)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator* (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator+ (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator- (T a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (const MPlex< T, D1, D2, N > &a, T b)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Noperator/ (T a, const MPlex< T, D1, D2, N > &b)
 
constexpr std::size_t round_up_align64 (std::size_t size)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsin (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
void sincos (const MPlex< T, D1, D2, N > &a, MPlex< T, D1, D2, N > &s, MPlex< T, D1, D2, N > &c)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsqr (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Nsqrt (const MPlex< T, D1, D2, N > &a)
 
template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex< T, D1, D2, Ntan (const MPlex< T, D1, D2, N > &a)
 

Variables

const idx_t gSymOffsets [7][36]
 

Typedef Documentation

◆ idx_t

typedef int Matriplex::idx_t

Definition at line 98 of file MatriplexCommon.h.

◆ MPlex

template<typename T , idx_t D1, idx_t D2, idx_t N>
using Matriplex::MPlex = typedef Matriplex<T, D1, D2, N>

Definition at line 314 of file Matriplex.h.

◆ MPlexSym

template<typename T , idx_t D, idx_t N>
using Matriplex::MPlexSym = typedef MatriplexSym<T, D, N>

Definition at line 269 of file MatriplexSym.h.

◆ MPlexVec

template<class MP >
using Matriplex::MPlexVec = typedef MatriplexVector<MP>

Definition at line 48 of file MatriplexVector.h.

Function Documentation

◆ __attribute__() [1/2]

template<typename T , idx_t D, idx_t N>
class Matriplex::__attribute__ ( (aligned(32))  )

no. of matrix rows

no. of matrix columns

no of elements: lower triangle

size of the whole matriplex

Definition at line 25 of file MatriplexSym.h.

References a, PVValHelper::add(), ASSUME_ALIGNED, b, filterCSVwithJSON::copy, ztail::d, gSymOffsets, mps_fire::i, recoMuon::in, dqmiolumiharvest::j, dqmdumpme::k, visualization-live-secondInstance_cfg::m, N, dqmiodumpmetadata::n, hgchebackDigitizer_cfi::noise, unpackBuffers-CaloStage1::offsets, operator[](), AlCaHLTBitMon_ParallelJobs::p, alignCSCRings::s, pfClustersFromCombinedCaloHF_cfi::scale, TrackRefitter_38T_cff::src, electronEcalRecHitIsolationLcone_cfi::subtract, createJobs::tmp, mitigatedMETSequence_cff::U, interactiveExample::ui, findQualityFiles::v, and geometryCSVtoXML::xx.

25  {
26  public:
27  typedef T value_type;
28 
30  static constexpr int kRows = D;
32  static constexpr int kCols = D;
34  static constexpr int kSize = (D + 1) * D / 2;
36  static constexpr int kTotSize = N * kSize;
37 
38  T fArray[kTotSize];
39 
40  MatriplexSym() {}
41  MatriplexSym(T v) { setVal(v); }
42 
43  idx_t plexSize() const { return N; }
44 
45  void setVal(T v) {
46  for (idx_t i = 0; i < kTotSize; ++i) {
47  fArray[i] = v;
48  }
49  }
50 
51  void add(const MatriplexSym& v) {
52  for (idx_t i = 0; i < kTotSize; ++i) {
53  fArray[i] += v.fArray[i];
54  }
55  }
56 
57  void scale(T scale) {
58  for (idx_t i = 0; i < kTotSize; ++i) {
59  fArray[i] *= scale;
60  }
61  }
62 
63  T operator[](idx_t xx) const { return fArray[xx]; }
64  T& operator[](idx_t xx) { return fArray[xx]; }
65 
66  const idx_t* offsets() const { return gSymOffsets[D]; }
67  idx_t off(idx_t i) const { return gSymOffsets[D][i]; }
68 
69  const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[off(i * D + j) * N + n]; }
70 
71  T& At(idx_t n, idx_t i, idx_t j) { return fArray[off(i * D + j) * N + n]; }
72 
73  T& operator()(idx_t n, idx_t i, idx_t j) { return At(n, i, j); }
74  const T& operator()(idx_t n, idx_t i, idx_t j) const { return constAt(n, i, j); }
75 
76  MatriplexSym& operator=(const MatriplexSym& m) {
77  memcpy(fArray, m.fArray, sizeof(T) * kTotSize);
78  return *this;
79  }
80 
81  void copySlot(idx_t n, const MatriplexSym& m) {
82  for (idx_t i = n; i < kTotSize; i += N) {
83  fArray[i] = m.fArray[i];
84  }
85  }
86 
87  void copyIn(idx_t n, const T* arr) {
88  for (idx_t i = n; i < kTotSize; i += N) {
89  fArray[i] = *(arr++);
90  }
91  }
92 
93  void copyIn(idx_t n, const MatriplexSym& m, idx_t in) {
94  for (idx_t i = n; i < kTotSize; i += N, in += N) {
95  fArray[i] = m[in];
96  }
97  }
98 
99  void copy(idx_t n, idx_t in) {
100  for (idx_t i = n; i < kTotSize; i += N, in += N) {
101  fArray[i] = fArray[in];
102  }
103  }
104 
105 #if defined(AVX512_INTRINSICS)
106 
107  template <typename U>
108  void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
109  //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
110 
111  const __m512 src = {0};
112  const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
113 
114  for (int i = 0; i < kSize; ++i, ++arr) {
115  //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
116 
117  __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
118  _mm512_mask_store_ps(&fArray[i * N], k, reg);
119  }
120  }
121 
122  // Experimental methods, slurpIn() seems to be at least as fast.
123  // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
124 
125  void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
126  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
127 
128  for (int i = 0; i < N; ++i) {
129  __m512 reg = _mm512_load_ps(arr + vi[i]);
130  _mm512_store_ps((void*)(tmp + 64 * i), reg);
131  }
132 
133  for (int i = 0; i < kSize; ++i) {
134  __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
135  _mm512_store_ps(&fArray[i * N], reg);
136  }
137  }
138 
139  void Contaginate(const char* arr, int vi[N], const char* tmp) {
140  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
141 
142  for (int i = 0; i < N; ++i) {
143  __m512 reg = _mm512_load_ps(arr + vi[i]);
144  _mm512_store_ps((void*)(tmp + 64 * i), reg);
145  }
146  }
147 
148  void Plexify(const char* tmp, __m512i& ui) {
149  for (int i = 0; i < kSize; ++i) {
150  __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
151  _mm512_store_ps(&fArray[i * N], reg);
152  }
153  }
154 
155 #elif defined(AVX2_INTRINSICS)
156 
157  template <typename U>
158  void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
159  const __m256 src = {0};
160 
161  __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
162  __m256i k_sel = _mm256_set1_epi32(N_proc);
163  __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
164 
165  k = k_master;
166  for (int i = 0; i < kSize; ++i, ++arr) {
167  __m256 reg = _mm256_mask_i32gather_ps(src, arr, vi, (__m256)k, sizeof(U));
168  // Restore mask (docs say gather clears it but it doesn't seem to).
169  k = k_master;
170  _mm256_maskstore_ps(&fArray[i * N], k, reg);
171  }
172  }
173 
174 #else
175 
176  void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
177  // Separate N_proc == N case (gains about 7% in fit test).
178  if (N_proc == N) {
179  for (int i = 0; i < kSize; ++i) {
180  for (int j = 0; j < N; ++j) {
181  fArray[i * N + j] = *(arr + i + vi[j]);
182  }
183  }
184  } else {
185  for (int i = 0; i < kSize; ++i) {
186  for (int j = 0; j < N_proc; ++j) {
187  fArray[i * N + j] = *(arr + i + vi[j]);
188  }
189  }
190  }
191  }
192 
193 #endif
194 
195  void copyOut(idx_t n, T* arr) const {
196  for (idx_t i = n; i < kTotSize; i += N) {
197  *(arr++) = fArray[i];
198  }
199  }
200 
201  void setDiagonal3x3(idx_t n, T d) {
202  T* p = fArray + n;
203 
204  p[0 * N] = d;
205  p[1 * N] = 0;
206  p[2 * N] = d;
207  p[3 * N] = 0;
208  p[4 * N] = 0;
209  p[5 * N] = d;
210  }
211 
212  MatriplexSym& subtract(const MatriplexSym& a, const MatriplexSym& b) {
213  // Does *this = a - b;
214 
215 #pragma omp simd
216  for (idx_t i = 0; i < kTotSize; ++i) {
217  fArray[i] = a.fArray[i] - b.fArray[i];
218  }
219 
220  return *this;
221  }
222 
223  // ==================================================================
224  // Operations specific to Kalman fit in 6 parameter space
225  // ==================================================================
226 
227  void addNoiseIntoUpperLeft3x3(T noise) {
228  T* p = fArray;
229  ASSUME_ALIGNED(p, 64);
230 
231 #pragma omp simd
232  for (idx_t n = 0; n < N; ++n) {
233  p[0 * N + n] += noise;
234  p[2 * N + n] += noise;
235  p[5 * N + n] += noise;
236  }
237  }
238 
239  void invertUpperLeft3x3() {
240  typedef T TT;
241 
242  T* a = fArray;
243  ASSUME_ALIGNED(a, 64);
244 
245 #pragma omp simd
246  for (idx_t n = 0; n < N; ++n) {
247  const TT c00 = a[2 * N + n] * a[5 * N + n] - a[4 * N + n] * a[4 * N + n];
248  const TT c01 = a[4 * N + n] * a[3 * N + n] - a[1 * N + n] * a[5 * N + n];
249  const TT c02 = a[1 * N + n] * a[4 * N + n] - a[2 * N + n] * a[3 * N + n];
250  const TT c11 = a[5 * N + n] * a[0 * N + n] - a[3 * N + n] * a[3 * N + n];
251  const TT c12 = a[3 * N + n] * a[1 * N + n] - a[4 * N + n] * a[0 * N + n];
252  const TT c22 = a[0 * N + n] * a[2 * N + n] - a[1 * N + n] * a[1 * N + n];
253 
254  // Force determinant calculation in double precision.
255  const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[3 * N + n] * c02;
256  const TT s = TT(1) / det;
257 
258  a[0 * N + n] = s * c00;
259  a[1 * N + n] = s * c01;
260  a[2 * N + n] = s * c11;
261  a[3 * N + n] = s * c02;
262  a[4 * N + n] = s * c12;
263  a[5 * N + n] = s * c22;
264  }
265  }
266  };
const idx_t gSymOffsets[7][36]
Definition: MatriplexSym.h:13
d
Definition: ztail.py:151
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
double b
Definition: hdecay.h:120
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
double a
Definition: hdecay.h:121
T operator[](int i) const
tmp
align.sh
Definition: createJobs.py:716
long double T
#define ASSUME_ALIGNED(a, b)

◆ __attribute__() [2/2]

template<typename T , idx_t D1, idx_t D2, idx_t N>
class Matriplex::__attribute__ ( (aligned(32))  )

return no. of matrix rows

return no. of matrix columns

return no of elements: rows*columns

size of the whole matriplex

Definition at line 11 of file Matriplex.h.

References a, PVValHelper::add(), b, filterCSVwithJSON::copy, cos(), hypot(), mps_fire::i, recoMuon::in, dqmiolumiharvest::j, dqmdumpme::k, visualization-live-secondInstance_cfg::m, N, dqmiodumpmetadata::n, operator*=(), operator+=(), operator-=(), operator/=(), operator[](), pfClustersFromCombinedCaloHF_cfi::scale, sin(), sqr(), sqrt(), TrackRefitter_38T_cff::src, submitPVValidationJobs::t, tan(), createJobs::tmp, mitigatedMETSequence_cff::U, interactiveExample::ui, findQualityFiles::v, and geometryCSVtoXML::xx.

11  {
12  public:
13  typedef T value_type;
14 
16  static constexpr int kRows = D1;
18  static constexpr int kCols = D2;
20  static constexpr int kSize = D1 * D2;
22  static constexpr int kTotSize = N * kSize;
23 
24  T fArray[kTotSize];
25 
26  Matriplex() {}
27  Matriplex(T v) { setVal(v); }
28 
29  idx_t plexSize() const { return N; }
30 
31  void setVal(T v) {
32  for (idx_t i = 0; i < kTotSize; ++i) {
33  fArray[i] = v;
34  }
35  }
36 
37  void add(const Matriplex& v) {
38  for (idx_t i = 0; i < kTotSize; ++i) {
39  fArray[i] += v.fArray[i];
40  }
41  }
42 
43  void scale(T scale) {
44  for (idx_t i = 0; i < kTotSize; ++i) {
45  fArray[i] *= scale;
46  }
47  }
48 
49  T operator[](idx_t xx) const { return fArray[xx]; }
50  T& operator[](idx_t xx) { return fArray[xx]; }
51 
52  const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
53 
54  T& At(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
55 
56  T& operator()(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
57  const T& operator()(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
58 
59  Matriplex& operator=(const Matriplex& m) {
60  memcpy(fArray, m.fArray, sizeof(T) * kTotSize);
61  return *this;
62  }
63 
64  Matriplex& operator=(T t) {
65  for (idx_t i = 0; i < kTotSize; ++i)
66  fArray[i] = t;
67  return *this;
68  }
69 
71  for (idx_t i = 0; i < kTotSize; ++i)
72  fArray[i] += t;
73  return *this;
74  }
75 
77  for (idx_t i = 0; i < kTotSize; ++i)
78  fArray[i] -= t;
79  return *this;
80  }
81 
83  for (idx_t i = 0; i < kTotSize; ++i)
84  fArray[i] *= t;
85  return *this;
86  }
87 
89  for (idx_t i = 0; i < kTotSize; ++i)
90  fArray[i] /= t;
91  return *this;
92  }
93 
94  Matriplex& operator+=(const Matriplex& a) {
95  for (idx_t i = 0; i < kTotSize; ++i)
96  fArray[i] += a.fArray[i];
97  return *this;
98  }
99 
100  Matriplex& operator-=(const Matriplex& a) {
101  for (idx_t i = 0; i < kTotSize; ++i)
102  fArray[i] -= a.fArray[i];
103  return *this;
104  }
105 
106  Matriplex& operator*=(const Matriplex& a) {
107  for (idx_t i = 0; i < kTotSize; ++i)
108  fArray[i] *= a.fArray[i];
109  return *this;
110  }
111 
112  Matriplex& operator/=(const Matriplex& a) {
113  for (idx_t i = 0; i < kTotSize; ++i)
114  fArray[i] /= a.fArray[i];
115  return *this;
116  }
117 
118  Matriplex& sqrt(const Matriplex& a) {
119  for (idx_t i = 0; i < kTotSize; ++i)
120  fArray[i] = std::sqrt(a.fArray[i]);
121  return *this;
122  }
123  Matriplex& sqrt() {
124  for (idx_t i = 0; i < kTotSize; ++i)
125  fArray[i] = std::sqrt(fArray[i]);
126  return *this;
127  }
128 
129  Matriplex& sqr(const Matriplex& a) {
130  for (idx_t i = 0; i < kTotSize; ++i)
131  fArray[i] = a.fArray[i] * a.fArray[i];
132  return *this;
133  }
134  Matriplex& sqr() {
135  for (idx_t i = 0; i < kTotSize; ++i)
136  fArray[i] = fArray[i] * fArray[i];
137  return *this;
138  }
139 
140  Matriplex& hypot(const Matriplex& a, const Matriplex& b) {
141  for (idx_t i = 0; i < kTotSize; ++i) {
142  fArray[i] = a.fArray[i] * a.fArray[i] + b.fArray[i] * b.fArray[i];
143  }
144  return sqrt();
145  }
146 
147  Matriplex& sin(const Matriplex& a) {
148  for (idx_t i = 0; i < kTotSize; ++i)
149  fArray[i] = std::sin(a.fArray[i]);
150  return *this;
151  }
152  Matriplex& sin() {
153  for (idx_t i = 0; i < kTotSize; ++i)
154  fArray[i] = std::sin(fArray[i]);
155  return *this;
156  }
157 
158  Matriplex& cos(const Matriplex& a) {
159  for (idx_t i = 0; i < kTotSize; ++i)
160  fArray[i] = std::cos(a.fArray[i]);
161  return *this;
162  }
163  Matriplex& cos() {
164  for (idx_t i = 0; i < kTotSize; ++i)
165  fArray[i] = std::cos(fArray[i]);
166  return *this;
167  }
168 
169  Matriplex& tan(const Matriplex& a) {
170  for (idx_t i = 0; i < kTotSize; ++i)
171  fArray[i] = std::tan(a.fArray[i]);
172  return *this;
173  }
174  Matriplex& tan() {
175  for (idx_t i = 0; i < kTotSize; ++i)
176  fArray[i] = std::tan(fArray[i]);
177  return *this;
178  }
179 
180  //---------------------------------------------------------
181 
182  void copySlot(idx_t n, const Matriplex& m) {
183  for (idx_t i = n; i < kTotSize; i += N) {
184  fArray[i] = m.fArray[i];
185  }
186  }
187 
188  void copyIn(idx_t n, const T* arr) {
189  for (idx_t i = n; i < kTotSize; i += N) {
190  fArray[i] = *(arr++);
191  }
192  }
193 
194  void copyIn(idx_t n, const Matriplex& m, idx_t in) {
195  for (idx_t i = n; i < kTotSize; i += N, in += N) {
196  fArray[i] = m[in];
197  }
198  }
199 
200  void copy(idx_t n, idx_t in) {
201  for (idx_t i = n; i < kTotSize; i += N, in += N) {
202  fArray[i] = fArray[in];
203  }
204  }
205 
206 #if defined(AVX512_INTRINSICS)
207 
208  template <typename U>
209  void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
210  //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
211 
212  const __m512 src = {0};
213  const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
214 
215  for (int i = 0; i < kSize; ++i, ++arr) {
216  //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
217 
218  __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
219  _mm512_mask_store_ps(&fArray[i * N], k, reg);
220  }
221  }
222 
223  // Experimental methods, slurpIn() seems to be at least as fast.
224  // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
225  void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
226  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
227 
228  for (int i = 0; i < N; ++i) {
229  __m512 reg = _mm512_load_ps(arr + vi[i]);
230  _mm512_store_ps((void*)(tmp + 64 * i), reg);
231  }
232 
233  for (int i = 0; i < kSize; ++i) {
234  __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
235  _mm512_store_ps(&fArray[i * N], reg);
236  }
237  }
238 
239  void Contaginate(const char* arr, int vi[N], const char* tmp) {
240  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
241 
242  for (int i = 0; i < N; ++i) {
243  __m512 reg = _mm512_load_ps(arr + vi[i]);
244  _mm512_store_ps((void*)(tmp + 64 * i), reg);
245  }
246  }
247 
248  void Plexify(const char* tmp, __m512i& ui) {
249  for (int i = 0; i < kSize; ++i) {
250  __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
251  _mm512_store_ps(&fArray[i * N], reg);
252  }
253  }
254 
255 #elif defined(AVX2_INTRINSICS)
256 
257  template <typename U>
258  void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
259  // Casts to float* needed to "support" also T=HitOnTrack.
260  // Note that sizeof(float) == sizeof(HitOnTrack) == 4.
261 
262  const __m256 src = {0};
263 
264  __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
265  __m256i k_sel = _mm256_set1_epi32(N_proc);
266  __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
267 
268  k = k_master;
269  for (int i = 0; i < kSize; ++i, ++arr) {
270  __m256 reg = _mm256_mask_i32gather_ps(src, (float*)arr, vi, (__m256)k, sizeof(U));
271  // Restore mask (docs say gather clears it but it doesn't seem to).
272  k = k_master;
273  _mm256_maskstore_ps((float*)&fArray[i * N], k, reg);
274  }
275  }
276 
277 #else
278 
279  void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
280  // Separate N_proc == N case (gains about 7% in fit test).
281  if (N_proc == N) {
282  for (int i = 0; i < kSize; ++i) {
283  for (int j = 0; j < N; ++j) {
284  fArray[i * N + j] = *(arr + i + vi[j]);
285  }
286  }
287  } else {
288  for (int i = 0; i < kSize; ++i) {
289  for (int j = 0; j < N_proc; ++j) {
290  fArray[i * N + j] = *(arr + i + vi[j]);
291  }
292  }
293  }
294  }
295 
296 #endif
297 
298  void copyOut(idx_t n, T* arr) const {
299  for (idx_t i = n; i < kTotSize; i += N) {
300  *(arr++) = fArray[i];
301  }
302  }
303 
304  Matriplex<T, 1, 1, N> ReduceFixedIJ(idx_t i, idx_t j) const {
305  Matriplex<T, 1, 1, N> t;
306  for (idx_t n = 0; n < N; ++n) {
307  t[n] = constAt(n, i, j);
308  }
309  return t;
310  }
311  };
Basic3DVector & operator*=(T t)
Scaling by a scalar value (multiplication)
Divides< B, C > D2
Definition: Factorize.h:137
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:417
MPlex< T, D1, D2, N > sin(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:423
Basic3DVector & operator-=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > sqr(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:411
Divides< A, C > D1
Definition: Factorize.h:136
#define N
Definition: blowfish.cc:9
Basic3DVector & operator/=(T t)
Scaling by a scalar value (division)
MPlex< T, D1, D2, N > tan(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:443
double b
Definition: hdecay.h:120
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
double a
Definition: hdecay.h:121
T operator[](int i) const
tmp
align.sh
Definition: createJobs.py:716
long double T
Basic3DVector & operator+=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > cos(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:429
MPlex< T, D1, D2, N > sqrt(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:405

◆ align_check()

void Matriplex::align_check ( const char *  pref,
void *  adr 
)

Definition at line 4 of file MatriplexCommon.cc.

Referenced by mkfit::MkFitter::checkAlignment().

4  {
5  printf("%s 0x%llx - modulo 64 = %lld\n", pref, (long long unsigned)adr, (long long)adr % 64);
6  }

◆ aligned_alloc64()

void* Matriplex::aligned_alloc64 ( std::size_t  size)
inline

Definition at line 13 of file Memory.h.

References aligned_alloc(), and round_up_align64().

Referenced by mkfit::Pool< mkfit::MkFitter >::create(), and Matriplex::MatriplexVector< MP >::MatriplexVector().

size
Write out results.
void * aligned_alloc(size_t alignment, size_t size) noexcept
constexpr std::size_t round_up_align64(std::size_t size)
Definition: Memory.h:8

◆ cos()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::cos ( const MPlex< T, D1, D2, N > &  a)

Definition at line 429 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), and sincos().

429  {
430  MPlex<T, D1, D2, N> t;
431  return t.cos(a);
432  }
double a
Definition: hdecay.h:121

◆ hypot()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::hypot ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 417 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

Referenced by __attribute__(), mkfit::MkFinder::addBestHit(), RegressionHelper::applyCombinationRegression(), objects.METAnalyzer.METAnalyzer::applyDeltaMet(), mkfit::MkFinder::bkFitFitTracks(), mkfit::MkFinder::bkFitFitTracksBH(), PFEGammaAlgo::calculateEleMVA(), PhotonEnergyCalibratorRun2::calibrate(), ElectronEnergyCalibratorRun2::calibrate(), mkfit::TrackBase::canReachRadius(), MuonGEMBaseHarvestor::computeEfficiency(), DQMGenericClient::computeEfficiency(), JetReCalibrator.Type1METCorrector::correct(), mkfit::TrackBase::d0BeamSpot(), L1JetRecoTreeProducer::doPFMetNoMu(), L1JetRecoTreeProducer::doPUPPIMetNoMu(), HLTRegionalEcalResonanceFilter::doSelection(), pat::PATMuonProducer::embedHighLevel(), pat::PATElectronProducer::embedHighLevel(), trklet::TrackletEventProcessor::event(), Phase2TrackerMonitorDigi::fillITPixelDigiHistos(), LHETablesProducer::fillLHEObjectTable(), Phase2TrackerMonitorDigi::fillOTDigiHistos(), Phase2TrackerValidateDigi::fillSimHitInfo(), mkfit::MkFinder::findCandidates(), GEMEfficiencyAnalyzer::findCSCSegmentCosmics(), GenParticles2HepMCConverter::FourVector(), Point::GetSigmaDeltaMu(), mkfit::kalmanOperation(), mkfit::kalmanPropagateAndComputeChi2(), mkfit::kalmanPropagateAndUpdate(), objects.METAnalyzer::makeGenTkMet(), mkfit::TrackBase::maxReachRadius(), LowPtElectronModifier::modifyObject(), l1tpf::ParametricResolution::operator()(), reco::parser::hypot_f::operator()(), mkfit::MkFitter::printPt(), DeepMETProducer::produce(), DeepMETSonicProducer::produce(), L1TPFMetNoMuProducer::produce(), PseudoTopProducer::produce(), PATTracksToPackedCandidates::produce(), EvtPlaneProducer::produce(), L1FPGATrackProducer::produce(), mkfit::mini_propagators::InitialState::propagate_to_r(), mkfit::mini_propagators::InitialStatePlex::propagate_to_r(), mkfit::propagateHelixToZMPlex(), mkfit::MkBase::propagateTracksToHitR(), mkfit::MkBase::propagateTracksToPCAZ(), trklet::L1TStub::r(), mkfit::TrackBase::rAtZ(), EcalUncalibRecHitWorkerMultiFit::run(), pf2pat::IPCutPFCandidateSelectorDefinition::select(), MultiTrackSelector::select(), HIMultiTrackSelector::select(), mkfit::MkFinder::selectHitIndices(), mkfit::MkFinder::selectHitIndicesV2(), pat::LeptonUpdater< T >::setDZ(), EGEtScaleSysModifier::setEcalEnergy(), ElectronEnergyCalibrator::setEcalEnergy(), PhotonEnergyCalibrator::setEnergyAndSystVarations(), JetReCalibrator::setFakeRawMETOnOldMiniAODs(), BJetId::setNNVectorVar(), pat::MET::shiftedP4(), pat::MET::shiftedP4_74x(), XHistogram::splitSegment(), DD4hep_XHistogram::splitSegment(), objects.METAnalyzer::sumXY(), reco::ForwardProton::t(), and mkfit::TrackBase::zAtR().

417  {
418  MPlex<T, D1, D2, N> t;
419  return t.hypot(a, b);
420  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ invertCholesky() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholesky ( MPlexVec< MPlex< T, D, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 126 of file MatriplexVector.h.

References A, mps_fire::i, invertCholesky(), and np.

126  {
127  const int np = n_to_process ? n_to_process : A.size();
128 
129  for (int i = 0; i < np; ++i) {
130  invertCholesky(A[i]);
131  }
132  }
int np
Definition: AMPTWrapper.h:43
void invertCholesky(MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCholesky() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholesky ( MPlex< T, D, D, N > &  A)

◆ invertCholeskySym() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholeskySym ( MPlexVec< MPlexSym< T, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 144 of file MatriplexVector.h.

References A, mps_fire::i, invertCholeskySym(), and np.

144  {
145  const int np = n_to_process ? n_to_process : A.size();
146 
147  for (int i = 0; i < np; ++i) {
149  }
150  }
void invertCholeskySym(MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
int np
Definition: AMPTWrapper.h:43
Definition: APVGainStruct.h:7

◆ invertCholeskySym() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCholeskySym ( MPlexSym< T, D, N > &  A)

◆ invertCramer() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramer ( MPlexVec< MPlex< T, D, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 117 of file MatriplexVector.h.

References A, mps_fire::i, invertCramer(), and np.

117  {
118  const int np = n_to_process ? n_to_process : A.size();
119 
120  for (int i = 0; i < np; ++i) {
121  invertCramer(A[i]);
122  }
123  }
int np
Definition: AMPTWrapper.h:43
void invertCramer(MPlexVec< MPlex< T, D, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCramer() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramer ( MPlex< T, D, D, N > &  A,
double *  determ = nullptr 
)

◆ invertCramerSym() [1/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramerSym ( MPlexVec< MPlexSym< T, D, N >> &  A,
int  n_to_process = 0 
)

Definition at line 135 of file MatriplexVector.h.

References A, mps_fire::i, invertCramerSym(), and np.

135  {
136  const int np = n_to_process ? n_to_process : A.size();
137 
138  for (int i = 0; i < np; ++i) {
139  invertCramerSym(A[i]);
140  }
141  }
int np
Definition: AMPTWrapper.h:43
void invertCramerSym(MPlexVec< MPlexSym< T, D, N >> &A, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ invertCramerSym() [2/2]

template<typename T , idx_t D, idx_t N>
void Matriplex::invertCramerSym ( MPlexSym< T, D, N > &  A,
double *  determ = nullptr 
)

◆ max()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::max ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 469 of file Matriplex.h.

References a, b, mps_fire::i, and submitPVValidationJobs::t.

Referenced by min_max().

469  {
470  MPlex<T, D1, D2, N> t;
471  for (idx_t i = 0; i < a.kTotSize; ++i) {
472  t.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
473  }
474  return t;
475  }
MPlex< T, D1, D2, N > max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:469
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ min()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::min ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 460 of file Matriplex.h.

References a, b, mps_fire::i, and submitPVValidationJobs::t.

Referenced by min_max().

460  {
461  MPlex<T, D1, D2, N> t;
462  for (idx_t i = 0; i < a.kTotSize; ++i) {
463  t.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
464  }
465  return t;
466  }
MPlex< T, D1, D2, N > min(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:460
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ min_max()

template<typename T , idx_t D1, idx_t D2, idx_t N>
void Matriplex::min_max ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b,
MPlex< T, D1, D2, N > &  min,
MPlex< T, D1, D2, N > &  max 
)

Definition at line 449 of file Matriplex.h.

References a, b, mps_fire::i, max(), and min().

Referenced by mkfit::MkFinder::selectHitIndicesV2().

452  {
453  for (idx_t i = 0; i < a.kTotSize; ++i) {
454  min.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
455  max.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
456  }
457  }
MPlex< T, D1, D2, N > min(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:460
MPlex< T, D1, D2, N > max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:469
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ multiply() [1/4]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiply ( const MPlexVec< MPlex< T, D1, D2, N >> &  A,
const MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 53 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

56  {
57  assert(A.size() == B.size());
58  assert(A.size() == C.size());
59 
60  const int np = n_to_process ? n_to_process : A.size();
61 
62  for (int i = 0; i < np; ++i) {
63  multiply(A[i], B[i], C[i]);
64  }
65  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiply() [2/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlexVec< MPlexSym< T, D, N >> &  A,
const MPlexVec< MPlexSym< T, D, N >> &  B,
MPlexVec< MPlex< T, D, D, N >> &  C,
int  n_to_process = 0 
)

Definition at line 100 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

103  {
104  assert(A.size() == B.size());
105  assert(A.size() == C.size());
106 
107  const int np = n_to_process ? n_to_process : A.size();
108 
109  for (int i = 0; i < np; ++i) {
110  multiply(A[i], B[i], C[i]);
111  }
112  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiply() [3/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlexSym< T, D, N > &  A,
const MPlexSym< T, D, N > &  B,
MPlex< T, D, D, N > &  C 
)

Definition at line 337 of file MatriplexSym.h.

References A, B, correctionTermsCaloMet_cff::C, and Matriplex::SymMultiplyCls< T, D, N >::multiply().

337  {
339  }
Definition: APVGainStruct.h:7
void multiply(const MPlexSym< T, D, N > &A, const MPlexSym< T, D, N > &B, MPlex< T, D, D, N > &C)
Definition: MatriplexSym.h:337
Definition: APVGainStruct.h:7

◆ multiply() [4/4]

template<typename T , idx_t D, idx_t N>
void Matriplex::multiply ( const MPlex< T, D, D, N > &  A,
const MPlex< T, D, D, N > &  B,
MPlex< T, D, D, N > &  C 
)

Definition at line 627 of file Matriplex.h.

References A, B, correctionTermsCaloMet_cff::C, Matriplex::MultiplyCls< T, D, N >::multiply(), and N.

Referenced by multiply(), multiply3in(), and AlignmentExtendedCorrelationsEntry::operator*=().

627  {
628 #ifdef DEBUG
629  printf("Multipl %d %d\n", D, N);
630 #endif
631 
633  }
Definition: APVGainStruct.h:7
void multiply(const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
Definition: Matriplex.h:627
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
Definition: APVGainStruct.h:7

◆ multiply3in()

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiply3in ( MPlexVec< MPlex< T, D1, D2, N >> &  A,
MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 83 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiply(), and np.

86  {
87  assert(A.size() == B.size());
88  assert(A.size() == C.size());
89 
90  const int np = n_to_process ? n_to_process : A.size();
91 
92  for (int i = 0; i < np; ++i) {
93  multiply(A[i], B[i], C[i]);
94  multiply(B[i], C[i], A[i]);
95  multiply(C[i], A[i], B[i]);
96  }
97  }
Definition: APVGainStruct.h:7
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
void multiply(const MPlexVec< MPlexSym< T, D, N >> &A, const MPlexVec< MPlexSym< T, D, N >> &B, MPlexVec< MPlex< T, D, D, N >> &C, int n_to_process=0)
Definition: APVGainStruct.h:7

◆ multiplyGeneral() [1/2]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiplyGeneral ( const MPlexVec< MPlex< T, D1, D2, N >> &  A,
const MPlexVec< MPlex< T, D2, D3, N >> &  B,
MPlexVec< MPlex< T, D1, D3, N >> &  C,
int  n_to_process = 0 
)

Definition at line 68 of file MatriplexVector.h.

References A, cms::cuda::assert(), B, correctionTermsCaloMet_cff::C, mps_fire::i, multiplyGeneral(), and np.

71  {
72  assert(A.size() == B.size());
73  assert(A.size() == C.size());
74 
75  const int np = n_to_process ? n_to_process : A.size();
76 
77  for (int i = 0; i < np; ++i) {
78  multiplyGeneral(A[i], B[i], C[i]);
79  }
80  }
Definition: APVGainStruct.h:7
void multiplyGeneral(const MPlexVec< MPlex< T, D1, D2, N >> &A, const MPlexVec< MPlex< T, D2, D3, N >> &B, MPlexVec< MPlex< T, D1, D3, N >> &C, int n_to_process=0)
assert(be >=bs)
int np
Definition: AMPTWrapper.h:43
Definition: APVGainStruct.h:7

◆ multiplyGeneral() [2/2]

template<typename T , idx_t D1, idx_t D2, idx_t D3, idx_t N>
void Matriplex::multiplyGeneral ( const MPlex< T, D1, D2, N > &  A,
const MPlex< T, D2, D3, N > &  B,
MPlex< T, D1, D3, N > &  C 
)

Definition at line 482 of file Matriplex.h.

References A, B, correctionTermsCaloMet_cff::C, trklet::D3, mps_fire::i, dqmiolumiharvest::j, dqmdumpme::k, N, and dqmiodumpmetadata::n.

Referenced by multiplyGeneral().

482  {
483  for (idx_t i = 0; i < D1; ++i) {
484  for (idx_t j = 0; j < D3; ++j) {
485  const idx_t ijo = N * (i * D3 + j);
486 
487 #pragma omp simd
488  for (idx_t n = 0; n < N; ++n) {
489  C.fArray[ijo + n] = 0;
490  }
491 
492  for (idx_t k = 0; k < D2; ++k) {
493  const idx_t iko = N * (i * D2 + k);
494  const idx_t kjo = N * (k * D3 + j);
495 
496 #pragma omp simd
497  for (idx_t n = 0; n < N; ++n) {
498  C.fArray[ijo + n] += A.fArray[iko + n] * B.fArray[kjo + n];
499  }
500  }
501  }
502  }
503  }
Divides< B, C > D2
Definition: Factorize.h:137
Definition: APVGainStruct.h:7
Divides< A, C > D1
Definition: Factorize.h:136
#define N
Definition: blowfish.cc:9
Definition: APVGainStruct.h:7

◆ operator*() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 335 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

335  {
336  MPlex<T, D1, D2, N> t = a;
337  t *= b;
338  return t;
339  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator*() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 363 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

363  {
364  MPlex<T, D1, D2, N> t = a;
365  t *= b;
366  return t;
367  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator*() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator* ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 391 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

391  {
392  MPlex<T, D1, D2, N> t = a;
393  t *= b;
394  return t;
395  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 321 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

321  {
322  MPlex<T, D1, D2, N> t = a;
323  t += b;
324  return t;
325  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 349 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

349  {
350  MPlex<T, D1, D2, N> t = a;
351  t += b;
352  return t;
353  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator+() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator+ ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 377 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

377  {
378  MPlex<T, D1, D2, N> t = a;
379  t += b;
380  return t;
381  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 328 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

328  {
329  MPlex<T, D1, D2, N> t = a;
330  t -= b;
331  return t;
332  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 356 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

356  {
357  MPlex<T, D1, D2, N> t = a;
358  t -= b;
359  return t;
360  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator-() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator- ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 384 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

384  {
385  MPlex<T, D1, D2, N> t = a;
386  t -= b;
387  return t;
388  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [1/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( const MPlex< T, D1, D2, N > &  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 342 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

342  {
343  MPlex<T, D1, D2, N> t = a;
344  t /= b;
345  return t;
346  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [2/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( const MPlex< T, D1, D2, N > &  a,
T  b 
)

Definition at line 370 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

370  {
371  MPlex<T, D1, D2, N> t = a;
372  t /= b;
373  return t;
374  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ operator/() [3/3]

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::operator/ ( T  a,
const MPlex< T, D1, D2, N > &  b 
)

Definition at line 398 of file Matriplex.h.

References a, b, and submitPVValidationJobs::t.

398  {
399  MPlex<T, D1, D2, N> t = a;
400  t /= b;
401  return t;
402  }
double b
Definition: hdecay.h:120
double a
Definition: hdecay.h:121

◆ round_up_align64()

constexpr std::size_t Matriplex::round_up_align64 ( std::size_t  size)

Definition at line 8 of file Memory.h.

References gpuClustering::pixelStatus::mask.

Referenced by aligned_alloc64().

8  {
9  constexpr std::size_t mask = 64 - 1;
10  return size & mask ? (size & ~mask) + 64 : size;
11  }
size
Write out results.
constexpr uint32_t mask
Definition: gpuClustering.h:26

◆ sin()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sin ( const MPlex< T, D1, D2, N > &  a)

Definition at line 423 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), and sincos().

423  {
424  MPlex<T, D1, D2, N> t;
425  return t.sin(a);
426  }
double a
Definition: hdecay.h:121

◆ sincos()

template<typename T , idx_t D1, idx_t D2, idx_t N>
void Matriplex::sincos ( const MPlex< T, D1, D2, N > &  a,
MPlex< T, D1, D2, N > &  s,
MPlex< T, D1, D2, N > &  c 
)

Definition at line 435 of file Matriplex.h.

References a, HltBtagPostValidation_cff::c, cos(), mps_fire::i, alignCSCRings::s, and sin().

435  {
436  for (idx_t i = 0; i < a.kTotSize; ++i) {
437  s.fArray[i] = std::sin(a.fArray[i]);
438  c.fArray[i] = std::cos(a.fArray[i]);
439  }
440  }
MPlex< T, D1, D2, N > sin(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:423
double a
Definition: hdecay.h:121
MPlex< T, D1, D2, N > cos(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:429

◆ sqr()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sqr ( const MPlex< T, D1, D2, N > &  a)

Definition at line 411 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__().

411  {
412  MPlex<T, D1, D2, N> t;
413  return t.sqrt(a);
414  }
double a
Definition: hdecay.h:121

◆ sqrt()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::sqrt ( const MPlex< T, D1, D2, N > &  a)

Definition at line 405 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), and Matriplex::CholeskyInverter< T, 3, N >::invert().

405  {
406  MPlex<T, D1, D2, N> t;
407  return t.sqrt(a);
408  }
double a
Definition: hdecay.h:121

◆ tan()

template<typename T , idx_t D1, idx_t D2, idx_t N>
MPlex<T, D1, D2, N> Matriplex::tan ( const MPlex< T, D1, D2, N > &  a)

Definition at line 443 of file Matriplex.h.

References a, and submitPVValidationJobs::t.

Referenced by __attribute__(), mkfit::helixAtRFromIterativeCCSFullJac(), and mkfit::helixAtZ().

443  {
444  MPlex<T, D1, D2, N> t;
445  return t.tan(a);
446  }
double a
Definition: hdecay.h:121

Variable Documentation

◆ gSymOffsets

const idx_t Matriplex::gSymOffsets[7][36]
Initial value:
= {{},
{},
{0, 1, 1, 2},
{0, 1, 3, 1, 2, 4, 3, 4, 5},
{},
{},
{0, 1, 3, 6, 10, 15, 1, 2, 4, 7, 11, 16, 3, 4, 5, 8, 12, 17,
6, 7, 8, 9, 13, 18, 10, 11, 12, 13, 14, 19, 15, 16, 17, 18, 19, 20}}

Definition at line 13 of file MatriplexSym.h.

Referenced by __attribute__().