CMS 3D CMS Logo

AVXVec.h
Go to the documentation of this file.
1 #ifndef DataFormat_Math_AVXVec_H
2 #define DataFormat_Math_AVXVec_H
3 
4 // in principle it should not be used alone
5 // only as part of SSEVec
6 namespace mathSSE {
7 
8  template <>
9  union Vec4<double> {
10  typedef __m256d nativeType;
11  __m256d vec;
12  double __attribute__((aligned(32))) arr[4];
13  OldVec<double> o;
14 
15  Vec4(__m256d ivec) : vec(ivec) {}
16 
17  Vec4(OldVec<double> const& ivec) : o(ivec) {}
18 
19  Vec4() { vec = _mm256_setzero_pd(); }
20 
21  inline Vec4(Vec4<float> ivec) { vec = _mm256_cvtps_pd(ivec.vec); }
22 
23  explicit Vec4(double f1) { set1(f1); }
24 
25  Vec4(double f1, double f2, double f3, double f4 = 0) {
26  arr[0] = f1;
27  arr[1] = f2;
28  arr[2] = f3;
29  arr[3] = f4;
30  }
31 
32  Vec4(Vec2<double> ivec0, Vec2<double> ivec1) {
33  vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
34  vec = _mm256_insertf128_pd(vec, ivec1.vec, 1);
35  }
36 
37  Vec4(Vec2<double> ivec0, double f3, double f4 = 0) {
38  vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
39  arr[2] = f3;
40  arr[3] = f4;
41  }
42 
43  Vec4(Vec2<double> ivec0) {
44  vec = _mm256_setzero_pd();
45  vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
46  }
47 
48  // for masking
49  void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
50  Mask4<double> mask(m1, m2, m3, m4);
51  vec = mask.vec;
52  }
53 
54  void set(double f1, double f2, double f3, double f4 = 0) { vec = _mm256_set_pd(f4, f3, f2, f1); }
55 
56  void set1(double f1) { vec = _mm256_set1_pd(f1); }
57 
58  template <int N>
59  Vec4 get1() const {
60  return _mm256_set1_pd(arr[N]); //FIXME
61  }
62  /*
63  Vec4 get1(unsigned int n) const {
64  return _mm256_set1_pd(arr[n]); //FIXME
65  }
66  */
67  double& operator[](unsigned int n) { return arr[n]; }
68 
69  double operator[](unsigned int n) const { return arr[n]; }
70 
71  Vec2<double> xy() const { return Vec2<double>(_mm256_castpd256_pd128(vec)); }
72  Vec2<double> zw() const { return Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec, vec, 1))); }
73  };
74 
75  inline Vec4<float>::Vec4(Vec4<double> ivec) { vec = _mm256_cvtpd_ps(ivec.vec); }
76 } // namespace mathSSE
77 
79  return _mm256_movemask_pd(_mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS)) == 0xf;
80 }
81 
83  return _mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS);
84 }
85 
87  return _mm256_cmp_pd(a.vec, b.vec, _CMP_GT_OS);
88 }
89 
91  return _mm256_hadd_pd(a.vec, b.vec);
92 }
93 
95  const __m256d neg = _mm256_set_pd(-0.0, -0.0, -0.0, -0.0);
96  return _mm256_xor_pd(a.vec, neg);
97 }
98 
100  return _mm256_and_pd(a.vec, b.vec);
101 }
103  return _mm256_or_pd(a.vec, b.vec);
104 }
106  return _mm256_xor_pd(a.vec, b.vec);
107 }
109  return _mm256_andnot_pd(a.vec, b.vec);
110 }
111 
113  return _mm256_add_pd(a.vec, b.vec);
114 }
115 
117  return _mm256_sub_pd(a.vec, b.vec);
118 }
119 
121  return _mm256_mul_pd(a.vec, b.vec);
122 }
123 
125  return _mm256_div_pd(a.vec, b.vec);
126 }
127 
129  return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
130 }
131 
133  return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
134 }
135 
137  return _mm256_div_pd(b.vec, _mm256_set1_pd(a));
138 }
139 
140 inline double __attribute__((always_inline)) __attribute__((pure))
142  using mathSSE::_mm256_dot_pd;
144  ret.vec = _mm256_dot_pd(a.vec, b.vec);
145  return ret.arr[0];
146 }
147 
148 inline mathSSE::Vec4<double> __attribute__((always_inline)) __attribute__((pure))
150  using mathSSE::_mm256_cross_pd;
151  return _mm256_cross_pd(a.vec, b.vec);
152 }
153 
154 inline double __attribute__((always_inline)) __attribute__((pure))
156  mathSSE::Vec4<double> mul = a * b;
157  mul = hadd(mul, mul);
158  return mul.arr[0];
159 }
160 
161 #endif
Definition: AVXVec.h:6
void set1(float f1)
Definition: SSEVec.h:184
mathSSE::Vec4< double > andnot(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:108
Basic3DVector cross(const Basic3DVector &v) const
Vector product, or "cross" product, with a vector of same type.
mathSSE::Vec4< double > operator &(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:99
mathSSE::Vec4< double > operator|(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:102
Vec4(Vec4< float > ivec)
Definition: AVXVec.h:21
bool operator==(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:78
ret
prodAgent to be discontinued
void set1(double f1)
Definition: AVXVec.h:56
Vec2< double > xy() const
Definition: AVXVec.h:71
mathSSE::Vec4< double > cmpgt(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:86
OldVec< T > o
Definition: SSEVec.h:204
Vec2< double > zw() const
Definition: AVXVec.h:72
mathSSE::Vec4< double > operator/(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:124
constexpr uint32_t mask
Definition: gpuClustering.h:26
T dot(const Basic3DVector &v) const
Scalar product, or "dot" product, with a vector of same type.
mathSSE::Vec4< double > operator-(mathSSE::Vec4< double > a)
Definition: AVXVec.h:94
Vec4(double f1)
Definition: AVXVec.h:23
mathSSE::Vec4< double > operator*(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:120
double operator[](unsigned int n) const
Definition: AVXVec.h:69
mathSSE::Vec4< double > operator+(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:112
ExtVec< T, 4 > Vec4
Definition: ExtVec.h:60
Vec4(double f1, double f2, double f3, double f4=0)
Definition: AVXVec.h:25
#define N
Definition: blowfish.cc:9
double __attribute__((always_inline)) __attribute__((pure)) dot(mathSSE
Definition: AVXVec.h:140
double b
Definition: hdecay.h:120
Vec4(Vec2< double > ivec0, Vec2< double > ivec1)
Definition: AVXVec.h:32
double a
Definition: hdecay.h:121
Vec4(OldVec< double > const &ivec)
Definition: AVXVec.h:17
mathSSE::Vec4< double > operator^(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:105
Vec4(Vec2< double > ivec0, double f3, double f4=0)
Definition: AVXVec.h:37
Vec4(Vec2< double > ivec0)
Definition: AVXVec.h:43
void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4)
Definition: AVXVec.h:49
Vec4 get1() const
Definition: AVXVec.h:59
mathSSE::Vec4< double > hadd(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:90
mathSSE::Vec4< double > cmpeq(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:82
double & operator[](unsigned int n)
Definition: AVXVec.h:67
T __attribute__((aligned(16))) arr[4]