CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
AVXVec.h
Go to the documentation of this file.
1 #ifndef DataFormat_Math_AVXVec_H
2 #define DataFormat_Math_AVXVec_H
3 
4 // in principle it should not be used alone
5 // only as part of SSEVec
6 namespace mathSSE {
7 
8  template<>
9  union Vec4<double> {
10  typedef __m256d nativeType;
11  __m256d vec;
12  double __attribute__ ((aligned(32))) arr[4];
13  OldVec<double> o;
14 
15  Vec4(__m256d ivec) : vec(ivec) {}
16 
17  Vec4(OldVec<double> const & ivec) : o(ivec) {}
18 
19  Vec4() {
20  vec = _mm256_setzero_pd();
21  }
22 
23 
24  inline Vec4(Vec4<float> ivec) {
25  vec = _mm256_cvtps_pd(ivec.vec);
26  }
27 
28  explicit Vec4(double f1) {
29  set1(f1);
30  }
31 
32  Vec4(double f1, double f2, double f3, double f4=0) {
33  arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
34  }
35 
36 
37  Vec4( Vec2<double> ivec0, Vec2<double> ivec1) {
38  vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
39  vec = _mm256_insertf128_pd(vec,ivec1.vec,1);
40 
41  }
42 
43  Vec4( Vec2<double> ivec0, double f3, double f4=0) {
44  vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
45  arr[2] = f3; arr[3] = f4;
46  }
47 
48  Vec4( Vec2<double> ivec0) {
49  vec = _mm256_setzero_pd();
50  vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
51  }
52 
53 
54  // for masking
55  void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
56  Mask4<double> mask(m1,m2,m3,m4); vec=mask.vec;
57  }
58 
59  void set(double f1, double f2, double f3, double f4=0) {
60  vec = _mm256_set_pd(f4, f3, f2, f1);
61  }
62 
63  void set1(double f1) {
64  vec = _mm256_set1_pd(f1);
65  }
66 
67  template<int N>
68  Vec4 get1() const {
69  return _mm256_set1_pd(arr[N]); //FIXME
70  }
71  /*
72  Vec4 get1(unsigned int n) const {
73  return _mm256_set1_pd(arr[n]); //FIXME
74  }
75  */
76  double & operator[](unsigned int n) {
77  return arr[n];
78  }
79 
80  double operator[](unsigned int n) const {
81  return arr[n];
82  }
83 
84  Vec2<double> xy() const { return Vec2<double>(_mm256_castpd256_pd128(vec));}
85  Vec2<double> zw() const { return Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec,vec,1)));}
86 
87  };
88 
89  inline Vec4<float>::Vec4(Vec4<double> ivec) {
90  vec = _mm256_cvtpd_ps(ivec.vec);
91  }
92 } // namespace mathSSE
93 
94 
95 
97  return _mm256_movemask_pd(_mm256_cmp_pd(a.vec,b.vec,_CMP_EQ_OS))==0xf;
98 }
99 
101  return _mm256_cmp_pd(a.vec,b.vec,_CMP_EQ_OS);
102 }
103 
105  return _mm256_cmp_pd(a.vec,b.vec,_CMP_GT_OS);
106 }
107 
109  return _mm256_hadd_pd(a.vec,b.vec);
110 }
111 
112 
113 
115  const __m256d neg = _mm256_set_pd ( -0.0 , -0.0 , -0.0, -0.0);
116  return _mm256_xor_pd(a.vec,neg);
117 }
118 
120  return _mm256_and_pd(a.vec,b.vec);
121 }
123  return _mm256_or_pd(a.vec,b.vec);
124 }
126  return _mm256_xor_pd(a.vec,b.vec);
127 }
129  return _mm256_andnot_pd(a.vec,b.vec);
130 }
131 
132 
134  return _mm256_add_pd(a.vec,b.vec);
135 }
136 
138  return _mm256_sub_pd(a.vec,b.vec);
139 }
140 
142  return _mm256_mul_pd(a.vec,b.vec);
143 }
144 
146  return _mm256_div_pd(a.vec,b.vec);
147 }
148 
150  return _mm256_mul_pd(_mm256_set1_pd(a),b.vec);
151 }
152 
154  return _mm256_mul_pd(_mm256_set1_pd(a),b.vec);
155 }
156 
158  return _mm256_div_pd(b.vec,_mm256_set1_pd(a));
159 }
160 
161 
162 inline double
163 __attribute__((always_inline)) __attribute__ ((pure))
164 dot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
165  using mathSSE::_mm256_dot_pd;
167  ret.vec = _mm256_dot_pd(a.vec,b.vec);
168  return ret.arr[0];
169 }
170 
171 inline mathSSE::Vec4<double>
172 __attribute__((always_inline)) __attribute__ ((pure))
173 cross(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
174  using mathSSE::_mm256_cross_pd;
175  return _mm256_cross_pd(a.vec,b.vec);
176 }
177 
178 inline double
179 __attribute__((always_inline)) __attribute__ ((pure))
180 dotxy(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
181  mathSSE::Vec4<double> mul = a*b;
182  mul = hadd(mul,mul);
183  return mul.arr[0];
184 }
185 
186 
187 #endif
void set1(float f1)
Definition: SSEVec.h:205
mathSSE::Vec4< double > andnot(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:128
mathSSE::Vec4< double > operator|(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:122
Vec4(Vec4< float > ivec)
Definition: AVXVec.h:24
Vec2< double > xy() const
Definition: AVXVec.h:84
double operator[](unsigned int n) const
Definition: AVXVec.h:80
ExtVec< T, 4 > Vec4
Definition: ExtVec.h:23
MatrixMeschach operator+(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
void set1(double f1)
Definition: AVXVec.h:63
mathSSE::Vec4< double > operator&(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:119
MatrixMeschach operator-(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
bool operator==(const CaloTower &t1, const CaloTower &t2)
Definition: CaloTower.h:209
mathSSE::Vec4< double > cmpgt(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:104
void set(double f1, double f2, double f3, double f4=0)
Definition: AVXVec.h:59
OldVec< T > o
Definition: SSEVec.h:224
Basic3DVector< long double > operator/(const Basic3DVector< long double > &v, S s)
Vec4(double f1)
Definition: AVXVec.h:28
Vec2< double > zw() const
Definition: AVXVec.h:85
Vec4(double f1, double f2, double f3, double f4=0)
Definition: AVXVec.h:32
#define N
Definition: blowfish.cc:9
float __attribute__((vector_size(8))) float32x2_t
Definition: ExtVec.h:6
double b
Definition: hdecay.h:120
Vec4(Vec2< double > ivec0, Vec2< double > ivec1)
Definition: AVXVec.h:37
T dot(const Basic3DVector &v) const
Scalar product, or &quot;dot&quot; product, with a vector of same type.
double a
Definition: hdecay.h:121
Vec4(OldVec< double > const &ivec)
Definition: AVXVec.h:17
mathSSE::Vec4< double > operator^(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:125
Vec4(Vec2< double > ivec0, double f3, double f4=0)
Definition: AVXVec.h:43
Vec4(Vec2< double > ivec0)
Definition: AVXVec.h:48
MatrixMeschach operator*(const MatrixMeschach &mat1, const MatrixMeschach &mat2)
void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4)
Definition: AVXVec.h:55
Vec4 get1() const
Definition: AVXVec.h:68
mathSSE::Vec4< double > hadd(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:108
mathSSE::Vec4< double > cmpeq(mathSSE::Vec4< double > a, mathSSE::Vec4< double > b)
Definition: AVXVec.h:100
double & operator[](unsigned int n)
Definition: AVXVec.h:76
T __attribute__((aligned(16))) arr[4]
Basic3DVector cross(const Basic3DVector &v) const
Vector product, or &quot;cross&quot; product, with a vector of same type.