CMS 3D CMS Logo

Matriplex.h
Go to the documentation of this file.
1 #ifndef RecoTracker_MkFitCore_src_Matriplex_Matriplex_h
2 #define RecoTracker_MkFitCore_src_Matriplex_Matriplex_h
3 
4 #include "MatriplexCommon.h"
5 
6 #ifdef MPLEX_VDT
7 // define fast_xyzz() version of transcendental methods and operators
8 #ifdef MPLEX_VDT_USE_STD
9 // this falls back to using std:: versions -- for testing and cross checking.
10 namespace std {
11  template <typename T>
12  T isqrt(T x) {
13  return T(1.0) / std::sqrt(x);
14  }
15  template <typename T>
16  void sincos(T a, T& s, T& c) {
17  s = std::sin(a);
18  c = std::cos(a);
19  }
20 } // namespace std
21 #else
22 #include "vdt/sqrt.h"
23 #include "vdt/sin.h"
24 #include "vdt/cos.h"
25 #include "vdt/tan.h"
26 #include "vdt/atan2.h"
27 #endif
28 #endif
29 
30 namespace Matriplex {
31 
32  //------------------------------------------------------------------------------
33 
34  template <typename T, idx_t D1, idx_t D2, idx_t N>
35  class __attribute__((aligned(MPLEX_ALIGN))) Matriplex {
36  public:
37  typedef T value_type;
38 
40  static constexpr int kRows = D1;
42  static constexpr int kCols = D2;
44  static constexpr int kSize = D1 * D2;
46  static constexpr int kTotSize = N * kSize;
47 
48  T fArray[kTotSize];
49 
50  Matriplex() {}
51  Matriplex(T v) { setVal(v); }
52 
53  idx_t plexSize() const { return N; }
54 
55  void setVal(T v) {
56  for (idx_t i = 0; i < kTotSize; ++i) {
57  fArray[i] = v;
58  }
59  }
60 
61  void add(const Matriplex& v) {
62  for (idx_t i = 0; i < kTotSize; ++i) {
63  fArray[i] += v.fArray[i];
64  }
65  }
66 
67  void scale(T scale) {
68  for (idx_t i = 0; i < kTotSize; ++i) {
69  fArray[i] *= scale;
70  }
71  }
72 
73  Matriplex& negate() {
74  for (idx_t i = 0; i < kTotSize; ++i) {
75  fArray[i] = -fArray[i];
76  }
77  return *this;
78  }
79 
80  template <typename TT>
81  Matriplex& negate_if_ltz(const Matriplex<TT, D1, D2, N>& sign) {
82  for (idx_t i = 0; i < kTotSize; ++i) {
83  if (sign.fArray[i] < 0)
84  fArray[i] = -fArray[i];
85  }
86  return *this;
87  }
88 
89  T operator[](idx_t xx) const { return fArray[xx]; }
90  T& operator[](idx_t xx) { return fArray[xx]; }
91 
92  const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
93 
94  T& At(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
95 
96  T& operator()(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
97  const T& operator()(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
98 
99  // reduction operators
100 
101  using QReduced = Matriplex<T, 1, 1, N>;
102 
103  QReduced ReduceFixedIJ(idx_t i, idx_t j) const {
104  QReduced t;
105  for (idx_t n = 0; n < N; ++n) {
106  t[n] = constAt(n, i, j);
107  }
108  return t;
109  }
110  QReduced rij(idx_t i, idx_t j) const { return ReduceFixedIJ(i, j); }
111  QReduced operator()(idx_t i, idx_t j) const { return ReduceFixedIJ(i, j); }
112 
113  struct QAssigner {
114  Matriplex& m_matriplex;
115  const int m_i, m_j;
116 
117  QAssigner(Matriplex& m, int i, int j) : m_matriplex(m), m_i(i), m_j(j) {}
118  Matriplex& operator=(const QReduced& qvec) {
119  for (idx_t n = 0; n < N; ++n) {
120  m_matriplex(n, m_i, m_j) = qvec[n];
121  }
122  return m_matriplex;
123  }
124  Matriplex& operator=(T qscalar) {
125  for (idx_t n = 0; n < N; ++n) {
126  m_matriplex(n, m_i, m_j) = qscalar;
127  }
128  return m_matriplex;
129  }
130  };
131 
132  QAssigner AssignFixedIJ(idx_t i, idx_t j) { return QAssigner(*this, i, j); }
133  QAssigner aij(idx_t i, idx_t j) { return AssignFixedIJ(i, j); }
134 
135  // assignment operators
136 
137  Matriplex& operator=(T t) {
138  for (idx_t i = 0; i < kTotSize; ++i)
139  fArray[i] = t;
140  return *this;
141  }
142 
143  Matriplex& operator+=(T t) {
144  for (idx_t i = 0; i < kTotSize; ++i)
145  fArray[i] += t;
146  return *this;
147  }
148 
149  Matriplex& operator-=(T t) {
150  for (idx_t i = 0; i < kTotSize; ++i)
151  fArray[i] -= t;
152  return *this;
153  }
154 
155  Matriplex& operator*=(T t) {
156  for (idx_t i = 0; i < kTotSize; ++i)
157  fArray[i] *= t;
158  return *this;
159  }
160 
161  Matriplex& operator/=(T t) {
162  for (idx_t i = 0; i < kTotSize; ++i)
163  fArray[i] /= t;
164  return *this;
165  }
166 
167  Matriplex& operator+=(const Matriplex& a) {
168  for (idx_t i = 0; i < kTotSize; ++i)
169  fArray[i] += a.fArray[i];
170  return *this;
171  }
172 
173  Matriplex& operator-=(const Matriplex& a) {
174  for (idx_t i = 0; i < kTotSize; ++i)
175  fArray[i] -= a.fArray[i];
176  return *this;
177  }
178 
179  Matriplex& operator*=(const Matriplex& a) {
180  for (idx_t i = 0; i < kTotSize; ++i)
181  fArray[i] *= a.fArray[i];
182  return *this;
183  }
184 
185  Matriplex& operator/=(const Matriplex& a) {
186  for (idx_t i = 0; i < kTotSize; ++i)
187  fArray[i] /= a.fArray[i];
188  return *this;
189  }
190 
191  Matriplex operator-() {
192  Matriplex t;
193  for (idx_t i = 0; i < kTotSize; ++i)
194  t.fArray[i] = -fArray[i];
195  return t;
196  }
197 
198  Matriplex& abs(const Matriplex& a) {
199  for (idx_t i = 0; i < kTotSize; ++i)
200  fArray[i] = std::abs(a.fArray[i]);
201  return *this;
202  }
203  Matriplex& abs() {
204  for (idx_t i = 0; i < kTotSize; ++i)
205  fArray[i] = std::abs(fArray[i]);
206  return *this;
207  }
208 
209  Matriplex& sqr(const Matriplex& a) {
210  for (idx_t i = 0; i < kTotSize; ++i)
211  fArray[i] = a.fArray[i] * a.fArray[i];
212  return *this;
213  }
214  Matriplex& sqr() {
215  for (idx_t i = 0; i < kTotSize; ++i)
216  fArray[i] = fArray[i] * fArray[i];
217  return *this;
218  }
219 
220  //---------------------------------------------------------
221  // transcendentals, std version
222 
223  Matriplex& sqrt(const Matriplex& a) {
224  for (idx_t i = 0; i < kTotSize; ++i)
225  fArray[i] = std::sqrt(a.fArray[i]);
226  return *this;
227  }
228  Matriplex& sqrt() {
229  for (idx_t i = 0; i < kTotSize; ++i)
230  fArray[i] = std::sqrt(fArray[i]);
231  return *this;
232  }
233 
234  Matriplex& hypot(const Matriplex& a, const Matriplex& b) {
235  for (idx_t i = 0; i < kTotSize; ++i) {
236  fArray[i] = a.fArray[i] * a.fArray[i] + b.fArray[i] * b.fArray[i];
237  }
238  return sqrt();
239  }
240 
241  Matriplex& sin(const Matriplex& a) {
242  for (idx_t i = 0; i < kTotSize; ++i)
243  fArray[i] = std::sin(a.fArray[i]);
244  return *this;
245  }
246  Matriplex& sin() {
247  for (idx_t i = 0; i < kTotSize; ++i)
248  fArray[i] = std::sin(fArray[i]);
249  return *this;
250  }
251 
252  Matriplex& cos(const Matriplex& a) {
253  for (idx_t i = 0; i < kTotSize; ++i)
254  fArray[i] = std::cos(a.fArray[i]);
255  return *this;
256  }
257  Matriplex& cos() {
258  for (idx_t i = 0; i < kTotSize; ++i)
259  fArray[i] = std::cos(fArray[i]);
260  return *this;
261  }
262 
263  Matriplex& tan(const Matriplex& a) {
264  for (idx_t i = 0; i < kTotSize; ++i)
265  fArray[i] = std::tan(a.fArray[i]);
266  return *this;
267  }
268  Matriplex& tan() {
269  for (idx_t i = 0; i < kTotSize; ++i)
270  fArray[i] = std::tan(fArray[i]);
271  return *this;
272  }
273 
274  Matriplex& atan2(const Matriplex& y, const Matriplex& x) {
275  for (idx_t i = 0; i < kTotSize; ++i)
276  fArray[i] = std::atan2(y.fArray[i], x.fArray[i]);
277  return *this;
278  }
279 
280  //---------------------------------------------------------
281  // transcendentals, vdt version
282 
283 #ifdef MPLEX_VDT
284 
285 #define ASS fArray[i] =
286 #define ARR fArray[i]
287 #define A_ARR a.fArray[i]
288 
289 #ifdef MPLEX_VDT_USE_STD
290 #define VDT_INVOKE(_ass_, _func_, ...) \
291  for (idx_t i = 0; i < kTotSize; ++i) \
292  _ass_ std::_func_(__VA_ARGS__);
293 #else
294 #define VDT_INVOKE(_ass_, _func_, ...) \
295  for (idx_t i = 0; i < kTotSize; ++i) \
296  if constexpr (std::is_same<T, float>()) \
297  _ass_ vdt::fast_##_func_##f(__VA_ARGS__); \
298  else \
299  _ass_ vdt::fast_##_func_(__VA_ARGS__);
300 #endif
301 
302  Matriplex& fast_isqrt(const Matriplex& a) {
303  VDT_INVOKE(ASS, isqrt, A_ARR);
304  return *this;
305  }
306  Matriplex& fast_isqrt() {
307  VDT_INVOKE(ASS, isqrt, ARR);
308  return *this;
309  }
310 
311  Matriplex& fast_sin(const Matriplex& a) {
312  VDT_INVOKE(ASS, sin, A_ARR);
313  return *this;
314  }
315  Matriplex& fast_sin() {
316  VDT_INVOKE(ASS, sin, ARR);
317  return *this;
318  }
319 
320  Matriplex& fast_cos(const Matriplex& a) {
321  VDT_INVOKE(ASS, cos, A_ARR);
322  return *this;
323  }
324  Matriplex& fast_cos() {
325  VDT_INVOKE(ASS, cos, ARR);
326  return *this;
327  }
328 
329  void fast_sincos(Matriplex& s, Matriplex& c) const { VDT_INVOKE(, sincos, ARR, s.fArray[i], c.fArray[i]); }
330 
331  Matriplex& fast_tan(const Matriplex& a) {
332  VDT_INVOKE(ASS, tan, A_ARR);
333  return *this;
334  }
335  Matriplex& fast_tan() {
336  VDT_INVOKE(ASS, tan, ARR);
337  return *this;
338  }
339 
340  Matriplex& fast_atan2(const Matriplex& y, const Matriplex& x) {
341  VDT_INVOKE(ASS, atan2, y.fArray[i], x.fArray[i]);
342  return *this;
343  }
344 
345 #undef VDT_INVOKE
346 
347 #undef ASS
348 #undef ARR
349 #undef A_ARR
350 #endif
351 
352  void sincos4(Matriplex& s, Matriplex& c) const {
353  for (idx_t i = 0; i < kTotSize; ++i)
354  internal::sincos4(fArray[i], s.fArray[i], c.fArray[i]);
355  }
356 
357  //---------------------------------------------------------
358 
359  void copySlot(idx_t n, const Matriplex& m) {
360  for (idx_t i = n; i < kTotSize; i += N) {
361  fArray[i] = m.fArray[i];
362  }
363  }
364 
365  void copyIn(idx_t n, const T* arr) {
366  for (idx_t i = n; i < kTotSize; i += N) {
367  fArray[i] = *(arr++);
368  }
369  }
370 
371  void copyIn(idx_t n, const Matriplex& m, idx_t in) {
372  for (idx_t i = n; i < kTotSize; i += N, in += N) {
373  fArray[i] = m[in];
374  }
375  }
376 
377  void copy(idx_t n, idx_t in) {
378  for (idx_t i = n; i < kTotSize; i += N, in += N) {
379  fArray[i] = fArray[in];
380  }
381  }
382 
383 #if defined(AVX512_INTRINSICS)
384 
385  template <typename U>
386  void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
387  //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
388 
389  const __m512 src = {0};
390  const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
391 
392  for (int i = 0; i < kSize; ++i, ++arr) {
393  //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
394 
395  __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
396  _mm512_mask_store_ps(&fArray[i * N], k, reg);
397  }
398  }
399 
400  // Experimental methods, slurpIn() seems to be at least as fast.
401  // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
402  void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
403  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
404 
405  for (int i = 0; i < N; ++i) {
406  __m512 reg = _mm512_load_ps(arr + vi[i]);
407  _mm512_store_ps((void*)(tmp + 64 * i), reg);
408  }
409 
410  for (int i = 0; i < kSize; ++i) {
411  __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
412  _mm512_store_ps(&fArray[i * N], reg);
413  }
414  }
415 
416  void Contaginate(const char* arr, int vi[N], const char* tmp) {
417  // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
418 
419  for (int i = 0; i < N; ++i) {
420  __m512 reg = _mm512_load_ps(arr + vi[i]);
421  _mm512_store_ps((void*)(tmp + 64 * i), reg);
422  }
423  }
424 
425  void Plexify(const char* tmp, __m512i& ui) {
426  for (int i = 0; i < kSize; ++i) {
427  __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
428  _mm512_store_ps(&fArray[i * N], reg);
429  }
430  }
431 
432 #elif defined(AVX2_INTRINSICS)
433 
434  template <typename U>
435  void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
436  // Casts to float* needed to "support" also T=HitOnTrack.
437  // Note that sizeof(float) == sizeof(HitOnTrack) == 4.
438 
439  const __m256 src = {0};
440 
441  __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
442  __m256i k_sel = _mm256_set1_epi32(N_proc);
443  __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
444 
445  k = k_master;
446  for (int i = 0; i < kSize; ++i, ++arr) {
447  __m256 reg = _mm256_mask_i32gather_ps(src, (float*)arr, vi, (__m256)k, sizeof(U));
448  // Restore mask (docs say gather clears it but it doesn't seem to).
449  k = k_master;
450  _mm256_maskstore_ps((float*)&fArray[i * N], k, reg);
451  }
452  }
453 
454 #else
455 
456  void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
457  // Separate N_proc == N case (gains about 7% in fit test).
458  if (N_proc == N) {
459  for (int i = 0; i < kSize; ++i) {
460  for (int j = 0; j < N; ++j) {
461  fArray[i * N + j] = *(arr + i + vi[j]);
462  }
463  }
464  } else {
465  for (int i = 0; i < kSize; ++i) {
466  for (int j = 0; j < N_proc; ++j) {
467  fArray[i * N + j] = *(arr + i + vi[j]);
468  }
469  }
470  }
471  }
472 
473 #endif
474 
475  void copyOut(idx_t n, T* arr) const {
476  for (idx_t i = n; i < kTotSize; i += N) {
477  *(arr++) = fArray[i];
478  }
479  }
480  };
481 
482  template <typename T, idx_t D1, idx_t D2, idx_t N>
483  using MPlex = Matriplex<T, D1, D2, N>;
484 
485  //==============================================================================
486  // Operators
487  //==============================================================================
488 
489  template <typename T, idx_t D1, idx_t D2, idx_t N>
492  t.negate();
493  return t;
494  }
495 
496  template <typename T, idx_t D1, idx_t D2, idx_t N>
499  t.negate();
500  return t;
501  }
502 
503  template <typename T, typename TT, idx_t D1, idx_t D2, idx_t N>
506  t.negate_if_ltz(sign);
507  return t;
508  }
509 
510  template <typename T, idx_t D1, idx_t D2, idx_t N>
513  t += b;
514  return t;
515  }
516 
517  template <typename T, idx_t D1, idx_t D2, idx_t N>
520  t -= b;
521  return t;
522  }
523 
524  template <typename T, idx_t D1, idx_t D2, idx_t N>
527  t *= b;
528  return t;
529  }
530 
531  template <typename T, idx_t D1, idx_t D2, idx_t N>
534  t /= b;
535  return t;
536  }
537 
538  template <typename T, idx_t D1, idx_t D2, idx_t N>
541  t += b;
542  return t;
543  }
544 
545  template <typename T, idx_t D1, idx_t D2, idx_t N>
548  t -= b;
549  return t;
550  }
551 
552  template <typename T, idx_t D1, idx_t D2, idx_t N>
555  t *= b;
556  return t;
557  }
558 
559  template <typename T, idx_t D1, idx_t D2, idx_t N>
562  t /= b;
563  return t;
564  }
565 
566  template <typename T, idx_t D1, idx_t D2, idx_t N>
569  t += b;
570  return t;
571  }
572 
573  template <typename T, idx_t D1, idx_t D2, idx_t N>
576  t -= b;
577  return t;
578  }
579 
580  template <typename T, idx_t D1, idx_t D2, idx_t N>
583  t *= b;
584  return t;
585  }
586 
587  template <typename T, idx_t D1, idx_t D2, idx_t N>
590  t /= b;
591  return t;
592  }
593 
594  template <typename T, idx_t D1, idx_t D2, idx_t N>
597  return t.abs(a);
598  }
599 
600  template <typename T, idx_t D1, idx_t D2, idx_t N>
603  return t.sqr(a);
604  }
605 
606  //---------------------------------------------------------
607  // transcendentals, std version
608 
609  template <typename T, idx_t D1, idx_t D2, idx_t N>
612  return t.sqrt(a);
613  }
614 
615  template <typename T, idx_t D1, idx_t D2, idx_t N>
618  return t.hypot(a, b);
619  }
620 
621  template <typename T, idx_t D1, idx_t D2, idx_t N>
624  return t.sin(a);
625  }
626 
627  template <typename T, idx_t D1, idx_t D2, idx_t N>
630  return t.cos(a);
631  }
632 
633  template <typename T, idx_t D1, idx_t D2, idx_t N>
635  for (idx_t i = 0; i < a.kTotSize; ++i) {
636  s.fArray[i] = std::sin(a.fArray[i]);
637  c.fArray[i] = std::cos(a.fArray[i]);
638  }
639  }
640 
641  template <typename T, idx_t D1, idx_t D2, idx_t N>
644  return t.tan(a);
645  }
646 
647  template <typename T, idx_t D1, idx_t D2, idx_t N>
650  return t.atan2(y, x);
651  }
652 
653  //---------------------------------------------------------
654  // transcendentals, vdt version
655 
656 #ifdef MPLEX_VDT
657 
658  template <typename T, idx_t D1, idx_t D2, idx_t N>
659  MPlex<T, D1, D2, N> fast_isqrt(const MPlex<T, D1, D2, N>& a) {
660  MPlex<T, D1, D2, N> t;
661  return t.fast_isqrt(a);
662  }
663 
664  template <typename T, idx_t D1, idx_t D2, idx_t N>
665  MPlex<T, D1, D2, N> fast_sin(const MPlex<T, D1, D2, N>& a) {
666  MPlex<T, D1, D2, N> t;
667  return t.fast_sin(a);
668  }
669 
670  template <typename T, idx_t D1, idx_t D2, idx_t N>
671  MPlex<T, D1, D2, N> fast_cos(const MPlex<T, D1, D2, N>& a) {
672  MPlex<T, D1, D2, N> t;
673  return t.fast_cos(a);
674  }
675 
676  template <typename T, idx_t D1, idx_t D2, idx_t N>
677  void fast_sincos(const MPlex<T, D1, D2, N>& a, MPlex<T, D1, D2, N>& s, MPlex<T, D1, D2, N>& c) {
678  a.fast_sincos(s, c);
679  }
680 
681  template <typename T, idx_t D1, idx_t D2, idx_t N>
682  MPlex<T, D1, D2, N> fast_tan(const MPlex<T, D1, D2, N>& a) {
683  MPlex<T, D1, D2, N> t;
684  return t.fast_tan(a);
685  }
686 
687  template <typename T, idx_t D1, idx_t D2, idx_t N>
688  MPlex<T, D1, D2, N> fast_atan2(const MPlex<T, D1, D2, N>& y, const MPlex<T, D1, D2, N>& x) {
689  MPlex<T, D1, D2, N> t;
690  return t.fast_atan2(y, x);
691  }
692 
693 #endif
694 
695  template <typename T, idx_t D1, idx_t D2, idx_t N>
697  a.sincos4(s, c);
698  }
699 
700  //---------------------------------------------------------
701 
702  template <typename T, idx_t D1, idx_t D2, idx_t N>
704  const MPlex<T, D1, D2, N>& b,
707  for (idx_t i = 0; i < a.kTotSize; ++i) {
708  min.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
709  max.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
710  }
711  }
712 
713  template <typename T, idx_t D1, idx_t D2, idx_t N>
716  for (idx_t i = 0; i < a.kTotSize; ++i) {
717  t.fArray[i] = std::min(a.fArray[i], b.fArray[i]);
718  }
719  return t;
720  }
721 
722  template <typename T, idx_t D1, idx_t D2, idx_t N>
725  for (idx_t i = 0; i < a.kTotSize; ++i) {
726  t.fArray[i] = std::max(a.fArray[i], b.fArray[i]);
727  }
728  return t;
729  }
730 
731  //==============================================================================
732  // Multiplications
733  //==============================================================================
734 
735  template <typename T, idx_t D1, idx_t D2, idx_t D3, idx_t N>
737  for (idx_t i = 0; i < D1; ++i) {
738  for (idx_t j = 0; j < D3; ++j) {
739  const idx_t ijo = N * (i * D3 + j);
740 
741 #pragma omp simd
742  for (idx_t n = 0; n < N; ++n) {
743  C.fArray[ijo + n] = 0;
744  }
745 
746  for (idx_t k = 0; k < D2; ++k) {
747  const idx_t iko = N * (i * D2 + k);
748  const idx_t kjo = N * (k * D3 + j);
749 
750 #pragma omp simd
751  for (idx_t n = 0; n < N; ++n) {
752  C.fArray[ijo + n] += A.fArray[iko + n] * B.fArray[kjo + n];
753  }
754  }
755  }
756  }
757  }
758 
759  //------------------------------------------------------------------------------
760 
761  template <typename T, idx_t D, idx_t N>
762  struct MultiplyCls {
764  throw std::runtime_error("general multiplication not supported, well, call multiplyGeneral()");
765  }
766  };
767 
768  template <typename T, idx_t N>
769  struct MultiplyCls<T, 3, N> {
771  const T* a = A.fArray;
772  ASSUME_ALIGNED(a, 64);
773  const T* b = B.fArray;
774  ASSUME_ALIGNED(b, 64);
775  T* c = C.fArray;
776  ASSUME_ALIGNED(c, 64);
777 
778 #pragma omp simd
779  for (idx_t n = 0; n < N; ++n) {
780  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[2 * N + n] * b[6 * N + n];
781  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[7 * N + n];
782  c[2 * N + n] = a[0 * N + n] * b[2 * N + n] + a[1 * N + n] * b[5 * N + n] + a[2 * N + n] * b[8 * N + n];
783  c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
784  c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
785  c[5 * N + n] = a[3 * N + n] * b[2 * N + n] + a[4 * N + n] * b[5 * N + n] + a[5 * N + n] * b[8 * N + n];
786  c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
787  c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
788  c[8 * N + n] = a[6 * N + n] * b[2 * N + n] + a[7 * N + n] * b[5 * N + n] + a[8 * N + n] * b[8 * N + n];
789  }
790  }
791  };
792 
793  template <typename T, idx_t N>
794  struct MultiplyCls<T, 6, N> {
796  const T* a = A.fArray;
797  ASSUME_ALIGNED(a, 64);
798  const T* b = B.fArray;
799  ASSUME_ALIGNED(b, 64);
800  T* c = C.fArray;
801  ASSUME_ALIGNED(c, 64);
802 #pragma omp simd
803  for (idx_t n = 0; n < N; ++n) {
804  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[6 * N + n] + a[2 * N + n] * b[12 * N + n] +
805  a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[24 * N + n] + a[5 * N + n] * b[30 * N + n];
806  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[13 * N + n] +
807  a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[25 * N + n] + a[5 * N + n] * b[31 * N + n];
808  c[2 * N + n] = a[0 * N + n] * b[2 * N + n] + a[1 * N + n] * b[8 * N + n] + a[2 * N + n] * b[14 * N + n] +
809  a[3 * N + n] * b[20 * N + n] + a[4 * N + n] * b[26 * N + n] + a[5 * N + n] * b[32 * N + n];
810  c[3 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[9 * N + n] + a[2 * N + n] * b[15 * N + n] +
811  a[3 * N + n] * b[21 * N + n] + a[4 * N + n] * b[27 * N + n] + a[5 * N + n] * b[33 * N + n];
812  c[4 * N + n] = a[0 * N + n] * b[4 * N + n] + a[1 * N + n] * b[10 * N + n] + a[2 * N + n] * b[16 * N + n] +
813  a[3 * N + n] * b[22 * N + n] + a[4 * N + n] * b[28 * N + n] + a[5 * N + n] * b[34 * N + n];
814  c[5 * N + n] = a[0 * N + n] * b[5 * N + n] + a[1 * N + n] * b[11 * N + n] + a[2 * N + n] * b[17 * N + n] +
815  a[3 * N + n] * b[23 * N + n] + a[4 * N + n] * b[29 * N + n] + a[5 * N + n] * b[35 * N + n];
816  c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[6 * N + n] + a[8 * N + n] * b[12 * N + n] +
817  a[9 * N + n] * b[18 * N + n] + a[10 * N + n] * b[24 * N + n] + a[11 * N + n] * b[30 * N + n];
818  c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[13 * N + n] +
819  a[9 * N + n] * b[19 * N + n] + a[10 * N + n] * b[25 * N + n] + a[11 * N + n] * b[31 * N + n];
820  c[8 * N + n] = a[6 * N + n] * b[2 * N + n] + a[7 * N + n] * b[8 * N + n] + a[8 * N + n] * b[14 * N + n] +
821  a[9 * N + n] * b[20 * N + n] + a[10 * N + n] * b[26 * N + n] + a[11 * N + n] * b[32 * N + n];
822  c[9 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[9 * N + n] + a[8 * N + n] * b[15 * N + n] +
823  a[9 * N + n] * b[21 * N + n] + a[10 * N + n] * b[27 * N + n] + a[11 * N + n] * b[33 * N + n];
824  c[10 * N + n] = a[6 * N + n] * b[4 * N + n] + a[7 * N + n] * b[10 * N + n] + a[8 * N + n] * b[16 * N + n] +
825  a[9 * N + n] * b[22 * N + n] + a[10 * N + n] * b[28 * N + n] + a[11 * N + n] * b[34 * N + n];
826  c[11 * N + n] = a[6 * N + n] * b[5 * N + n] + a[7 * N + n] * b[11 * N + n] + a[8 * N + n] * b[17 * N + n] +
827  a[9 * N + n] * b[23 * N + n] + a[10 * N + n] * b[29 * N + n] + a[11 * N + n] * b[35 * N + n];
828  c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[6 * N + n] + a[14 * N + n] * b[12 * N + n] +
829  a[15 * N + n] * b[18 * N + n] + a[16 * N + n] * b[24 * N + n] + a[17 * N + n] * b[30 * N + n];
830  c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[13 * N + n] +
831  a[15 * N + n] * b[19 * N + n] + a[16 * N + n] * b[25 * N + n] + a[17 * N + n] * b[31 * N + n];
832  c[14 * N + n] = a[12 * N + n] * b[2 * N + n] + a[13 * N + n] * b[8 * N + n] + a[14 * N + n] * b[14 * N + n] +
833  a[15 * N + n] * b[20 * N + n] + a[16 * N + n] * b[26 * N + n] + a[17 * N + n] * b[32 * N + n];
834  c[15 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[9 * N + n] + a[14 * N + n] * b[15 * N + n] +
835  a[15 * N + n] * b[21 * N + n] + a[16 * N + n] * b[27 * N + n] + a[17 * N + n] * b[33 * N + n];
836  c[16 * N + n] = a[12 * N + n] * b[4 * N + n] + a[13 * N + n] * b[10 * N + n] + a[14 * N + n] * b[16 * N + n] +
837  a[15 * N + n] * b[22 * N + n] + a[16 * N + n] * b[28 * N + n] + a[17 * N + n] * b[34 * N + n];
838  c[17 * N + n] = a[12 * N + n] * b[5 * N + n] + a[13 * N + n] * b[11 * N + n] + a[14 * N + n] * b[17 * N + n] +
839  a[15 * N + n] * b[23 * N + n] + a[16 * N + n] * b[29 * N + n] + a[17 * N + n] * b[35 * N + n];
840  c[18 * N + n] = a[18 * N + n] * b[0 * N + n] + a[19 * N + n] * b[6 * N + n] + a[20 * N + n] * b[12 * N + n] +
841  a[21 * N + n] * b[18 * N + n] + a[22 * N + n] * b[24 * N + n] + a[23 * N + n] * b[30 * N + n];
842  c[19 * N + n] = a[18 * N + n] * b[1 * N + n] + a[19 * N + n] * b[7 * N + n] + a[20 * N + n] * b[13 * N + n] +
843  a[21 * N + n] * b[19 * N + n] + a[22 * N + n] * b[25 * N + n] + a[23 * N + n] * b[31 * N + n];
844  c[20 * N + n] = a[18 * N + n] * b[2 * N + n] + a[19 * N + n] * b[8 * N + n] + a[20 * N + n] * b[14 * N + n] +
845  a[21 * N + n] * b[20 * N + n] + a[22 * N + n] * b[26 * N + n] + a[23 * N + n] * b[32 * N + n];
846  c[21 * N + n] = a[18 * N + n] * b[3 * N + n] + a[19 * N + n] * b[9 * N + n] + a[20 * N + n] * b[15 * N + n] +
847  a[21 * N + n] * b[21 * N + n] + a[22 * N + n] * b[27 * N + n] + a[23 * N + n] * b[33 * N + n];
848  c[22 * N + n] = a[18 * N + n] * b[4 * N + n] + a[19 * N + n] * b[10 * N + n] + a[20 * N + n] * b[16 * N + n] +
849  a[21 * N + n] * b[22 * N + n] + a[22 * N + n] * b[28 * N + n] + a[23 * N + n] * b[34 * N + n];
850  c[23 * N + n] = a[18 * N + n] * b[5 * N + n] + a[19 * N + n] * b[11 * N + n] + a[20 * N + n] * b[17 * N + n] +
851  a[21 * N + n] * b[23 * N + n] + a[22 * N + n] * b[29 * N + n] + a[23 * N + n] * b[35 * N + n];
852  c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[6 * N + n] + a[26 * N + n] * b[12 * N + n] +
853  a[27 * N + n] * b[18 * N + n] + a[28 * N + n] * b[24 * N + n] + a[29 * N + n] * b[30 * N + n];
854  c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[13 * N + n] +
855  a[27 * N + n] * b[19 * N + n] + a[28 * N + n] * b[25 * N + n] + a[29 * N + n] * b[31 * N + n];
856  c[26 * N + n] = a[24 * N + n] * b[2 * N + n] + a[25 * N + n] * b[8 * N + n] + a[26 * N + n] * b[14 * N + n] +
857  a[27 * N + n] * b[20 * N + n] + a[28 * N + n] * b[26 * N + n] + a[29 * N + n] * b[32 * N + n];
858  c[27 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[9 * N + n] + a[26 * N + n] * b[15 * N + n] +
859  a[27 * N + n] * b[21 * N + n] + a[28 * N + n] * b[27 * N + n] + a[29 * N + n] * b[33 * N + n];
860  c[28 * N + n] = a[24 * N + n] * b[4 * N + n] + a[25 * N + n] * b[10 * N + n] + a[26 * N + n] * b[16 * N + n] +
861  a[27 * N + n] * b[22 * N + n] + a[28 * N + n] * b[28 * N + n] + a[29 * N + n] * b[34 * N + n];
862  c[29 * N + n] = a[24 * N + n] * b[5 * N + n] + a[25 * N + n] * b[11 * N + n] + a[26 * N + n] * b[17 * N + n] +
863  a[27 * N + n] * b[23 * N + n] + a[28 * N + n] * b[29 * N + n] + a[29 * N + n] * b[35 * N + n];
864  c[30 * N + n] = a[30 * N + n] * b[0 * N + n] + a[31 * N + n] * b[6 * N + n] + a[32 * N + n] * b[12 * N + n] +
865  a[33 * N + n] * b[18 * N + n] + a[34 * N + n] * b[24 * N + n] + a[35 * N + n] * b[30 * N + n];
866  c[31 * N + n] = a[30 * N + n] * b[1 * N + n] + a[31 * N + n] * b[7 * N + n] + a[32 * N + n] * b[13 * N + n] +
867  a[33 * N + n] * b[19 * N + n] + a[34 * N + n] * b[25 * N + n] + a[35 * N + n] * b[31 * N + n];
868  c[32 * N + n] = a[30 * N + n] * b[2 * N + n] + a[31 * N + n] * b[8 * N + n] + a[32 * N + n] * b[14 * N + n] +
869  a[33 * N + n] * b[20 * N + n] + a[34 * N + n] * b[26 * N + n] + a[35 * N + n] * b[32 * N + n];
870  c[33 * N + n] = a[30 * N + n] * b[3 * N + n] + a[31 * N + n] * b[9 * N + n] + a[32 * N + n] * b[15 * N + n] +
871  a[33 * N + n] * b[21 * N + n] + a[34 * N + n] * b[27 * N + n] + a[35 * N + n] * b[33 * N + n];
872  c[34 * N + n] = a[30 * N + n] * b[4 * N + n] + a[31 * N + n] * b[10 * N + n] + a[32 * N + n] * b[16 * N + n] +
873  a[33 * N + n] * b[22 * N + n] + a[34 * N + n] * b[28 * N + n] + a[35 * N + n] * b[34 * N + n];
874  c[35 * N + n] = a[30 * N + n] * b[5 * N + n] + a[31 * N + n] * b[11 * N + n] + a[32 * N + n] * b[17 * N + n] +
875  a[33 * N + n] * b[23 * N + n] + a[34 * N + n] * b[29 * N + n] + a[35 * N + n] * b[35 * N + n];
876  }
877  }
878  };
879 
880  template <typename T, idx_t D, idx_t N>
882 #ifdef DEBUG
883  printf("Multipl %d %d\n", D, N);
884 #endif
885 
887  }
888 
889  //==============================================================================
890  // Cramer inversion
891  //==============================================================================
892 
893  template <typename T, idx_t D, idx_t N>
894  struct CramerInverter {
895  static void invert(MPlex<T, D, D, N>& A, double* determ = nullptr) {
896  throw std::runtime_error("general cramer inversion not supported");
897  }
898  };
899 
900  template <typename T, idx_t N>
901  struct CramerInverter<T, 2, N> {
902  static void invert(MPlex<T, 2, 2, N>& A, double* determ = nullptr) {
903  typedef T TT;
904 
905  T* a = A.fArray;
906  ASSUME_ALIGNED(a, 64);
907 
908 #pragma omp simd
909  for (idx_t n = 0; n < N; ++n) {
910  // Force determinant calculation in double precision.
911  const double det = (double)a[0 * N + n] * a[3 * N + n] - (double)a[2 * N + n] * a[1 * N + n];
912  if (determ)
913  determ[n] = det;
914 
915  const TT s = TT(1) / det;
916  const TT tmp = s * a[3 * N + n];
917  a[1 * N + n] *= -s;
918  a[2 * N + n] *= -s;
919  a[3 * N + n] = s * a[0 * N + n];
920  a[0 * N + n] = tmp;
921  }
922  }
923  };
924 
925  template <typename T, idx_t N>
926  struct CramerInverter<T, 3, N> {
927  static void invert(MPlex<T, 3, 3, N>& A, double* determ = nullptr) {
928  typedef T TT;
929 
930  T* a = A.fArray;
931  ASSUME_ALIGNED(a, 64);
932 
933 #pragma omp simd
934  for (idx_t n = 0; n < N; ++n) {
935  const TT c00 = a[4 * N + n] * a[8 * N + n] - a[5 * N + n] * a[7 * N + n];
936  const TT c01 = a[5 * N + n] * a[6 * N + n] - a[3 * N + n] * a[8 * N + n];
937  const TT c02 = a[3 * N + n] * a[7 * N + n] - a[4 * N + n] * a[6 * N + n];
938  const TT c10 = a[7 * N + n] * a[2 * N + n] - a[8 * N + n] * a[1 * N + n];
939  const TT c11 = a[8 * N + n] * a[0 * N + n] - a[6 * N + n] * a[2 * N + n];
940  const TT c12 = a[6 * N + n] * a[1 * N + n] - a[7 * N + n] * a[0 * N + n];
941  const TT c20 = a[1 * N + n] * a[5 * N + n] - a[2 * N + n] * a[4 * N + n];
942  const TT c21 = a[2 * N + n] * a[3 * N + n] - a[0 * N + n] * a[5 * N + n];
943  const TT c22 = a[0 * N + n] * a[4 * N + n] - a[1 * N + n] * a[3 * N + n];
944 
945  // Force determinant calculation in double precision.
946  const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[2 * N + n] * c02;
947  if (determ)
948  determ[n] = det;
949 
950  const TT s = TT(1) / det;
951  a[0 * N + n] = s * c00;
952  a[1 * N + n] = s * c10;
953  a[2 * N + n] = s * c20;
954  a[3 * N + n] = s * c01;
955  a[4 * N + n] = s * c11;
956  a[5 * N + n] = s * c21;
957  a[6 * N + n] = s * c02;
958  a[7 * N + n] = s * c12;
959  a[8 * N + n] = s * c22;
960  }
961  }
962  };
963 
964  template <typename T, idx_t D, idx_t N>
965  void invertCramer(MPlex<T, D, D, N>& A, double* determ = nullptr) {
967  }
968 
969  //==============================================================================
970  // Cholesky inversion
971  //==============================================================================
972 
973  template <typename T, idx_t D, idx_t N>
975  static void invert(MPlex<T, D, D, N>& A) { throw std::runtime_error("general cholesky inversion not supported"); }
976  };
977 
978  template <typename T, idx_t N>
979  struct CholeskyInverter<T, 3, N> {
980  // Note: this only works on symmetric matrices.
981  // Optimized version for positive definite matrices, no checks.
982  static void invert(MPlex<T, 3, 3, N>& A) {
983  typedef T TT;
984 
985  T* a = A.fArray;
986  ASSUME_ALIGNED(a, 64);
987 
988 #pragma omp simd
989  for (idx_t n = 0; n < N; ++n) {
990  TT l0 = std::sqrt(T(1) / a[0 * N + n]);
991  TT l1 = a[3 * N + n] * l0;
992  TT l2 = a[4 * N + n] - l1 * l1;
993  l2 = std::sqrt(T(1) / l2);
994  TT l3 = a[6 * N + n] * l0;
995  TT l4 = (a[7 * N + n] - l1 * l3) * l2;
996  TT l5 = a[8 * N + n] - (l3 * l3 + l4 * l4);
997  l5 = std::sqrt(T(1) / l5);
998 
999  // decomposition done
1000 
1001  l3 = (l1 * l4 * l2 - l3) * l0 * l5;
1002  l1 = -l1 * l0 * l2;
1003  l4 = -l4 * l2 * l5;
1004 
1005  a[0 * N + n] = l3 * l3 + l1 * l1 + l0 * l0;
1006  a[1 * N + n] = a[3 * N + n] = l3 * l4 + l1 * l2;
1007  a[4 * N + n] = l4 * l4 + l2 * l2;
1008  a[2 * N + n] = a[6 * N + n] = l3 * l5;
1009  a[5 * N + n] = a[7 * N + n] = l4 * l5;
1010  a[8 * N + n] = l5 * l5;
1011 
1012  // m(2,x) are all zero if anything went wrong at l5.
1013  // all zero, if anything went wrong already for l0 or l2.
1014  }
1015  }
1016  };
1017 
1018  template <typename T, idx_t D, idx_t N>
1021  }
1022 
1023 } // namespace Matriplex
1024 
1025 #endif
Basic3DVector & operator*=(T t)
Scaling by a scalar value (multiplication)
void sincos4(const MPlex< T, D1, D2, N > &a, MPlex< T, D1, D2, N > &s, MPlex< T, D1, D2, N > &c)
Definition: Matriplex.h:696
Divides< B, C > D2
Definition: Factorize.h:137
void sincos4(const T x, T &sin, T &cos)
Basic3DVector & operator=(const Basic3DVector &)=default
Assignment operator.
MPlex< T, D1, D2, N > negate(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:497
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:616
Definition: APVGainStruct.h:7
Matriplex< T, D1, D2, N > MPlex
Definition: Matriplex.h:483
MPlex< T, D1, D2, N > operator+(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:511
MPlex< T, D1, D2, N > min(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:714
void multiply(const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
Definition: Matriplex.h:881
MPlex< T, D1, D2, N > sin(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:622
static void invert(MPlex< T, D, D, N > &A, double *determ=nullptr)
Definition: Matriplex.h:895
MPlex< T, D1, D2, N > operator-(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:490
MPlex< T, D1, D2, N > operator/(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:532
Basic3DVector & operator-=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > sqr(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:601
static void multiply(const MPlex< T, 3, 3, N > &A, const MPlex< T, 3, 3, N > &B, MPlex< T, 3, 3, N > &C)
Definition: Matriplex.h:770
static void multiply(const MPlex< T, 6, 6, N > &A, const MPlex< T, 6, 6, N > &B, MPlex< T, 6, 6, N > &C)
Definition: Matriplex.h:795
static void invert(MPlex< T, 2, 2, N > &A, double *determ=nullptr)
Definition: Matriplex.h:902
void sincos(const MPlex< T, D1, D2, N > &a, MPlex< T, D1, D2, N > &s, MPlex< T, D1, D2, N > &c)
Definition: Matriplex.h:634
Divides< A, C > D1
Definition: Factorize.h:136
#define MPLEX_ALIGN
MPF fast_tan(const MPF &a)
MPlex< T, D1, D2, N > abs(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:595
static void invert(MPlex< T, 3, 3, N > &A, double *determ=nullptr)
Definition: Matriplex.h:927
MPlex< T, D1, D2, N > max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:723
void min_max(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b, MPlex< T, D1, D2, N > &min, MPlex< T, D1, D2, N > &max)
Definition: Matriplex.h:703
MPlex< T, D1, D2, N > negate_if_ltz(const MPlex< T, D1, D2, N > &a, const MPlex< TT, D1, D2, N > &sign)
Definition: Matriplex.h:504
static void invert(MPlex< T, 3, 3, N > &A)
Definition: Matriplex.h:982
void multiplyGeneral(const MPlex< T, D1, D2, N > &A, const MPlex< T, D2, D3, N > &B, MPlex< T, D1, D3, N > &C)
Definition: Matriplex.h:736
MPlex< T, D1, D2, N > operator*(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:525
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
void invertCramer(MPlex< T, D, D, N > &A, double *determ=nullptr)
Definition: Matriplex.h:965
Basic3DVector & operator/=(T t)
Scaling by a scalar value (division)
MPlex< T, D1, D2, N > tan(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:642
double b
Definition: hdecay.h:120
class __attribute__((aligned(32))) Matriplex
Definition: Matriplex.h:35
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
double a
Definition: hdecay.h:121
void invertCholesky(MPlex< T, D, D, N > &A)
Definition: Matriplex.h:1019
float x
Definition: APVGainStruct.h:7
static void invert(MPlex< T, D, D, N > &A)
Definition: Matriplex.h:975
MPF fast_atan2(const MPF &y, const MPF &x)
static void multiply(const MPlex< T, D, D, N > &A, const MPlex< T, D, D, N > &B, MPlex< T, D, D, N > &C)
Definition: Matriplex.h:763
void fast_sincos(const MPF &a, MPF &s, MPF &c)
T operator[](int i) const
tmp
align.sh
Definition: createJobs.py:716
long double T
#define ASSUME_ALIGNED(a, b)
Basic3DVector & operator+=(const Basic3DVector< U > &p)
MPlex< T, D1, D2, N > cos(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:628
MPlex< T, D1, D2, N > sqrt(const MPlex< T, D1, D2, N > &a)
Definition: Matriplex.h:610
MPlex< T, D1, D2, N > atan2(const MPlex< T, D1, D2, N > &y, const MPlex< T, D1, D2, N > &x)
Definition: Matriplex.h:648