CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_2_9/src/DataFormats/Math/interface/AVXVec.h

Go to the documentation of this file.
00001 #ifndef DataFormat_Math_AVXVec_H
00002 #define DataFormat_Math_AVXVec_H
00003 
00004 // in principle it should not be used alone
00005 // only as part of SSEVec
00006 namespace mathSSE {
00007 
00008   template<>
00009   union Vec4<double> {
00010     typedef  __m256d nativeType;
00011     __m256d vec;
00012     double __attribute__ ((aligned(32))) arr[4];
00013     OldVec<double> o;
00014     
00015     Vec4(__m256d ivec) : vec(ivec) {}
00016 
00017     Vec4(OldVec<double> const & ivec) : o(ivec) {}
00018     
00019     Vec4() {
00020       vec = _mm256_setzero_pd();
00021     }
00022 
00023 
00024     inline Vec4(Vec4<float> ivec) {
00025       vec = _mm256_cvtps_pd(ivec.vec);
00026     }
00027 
00028     explicit Vec4(double f1) {
00029       set1(f1);
00030     }
00031 
00032     Vec4(double f1, double f2, double f3, double f4=0) {
00033       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00034     }
00035 
00036 
00037     Vec4( Vec2<double> ivec0,   Vec2<double> ivec1) {
00038      vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
00039      vec = _mm256_insertf128_pd(vec,ivec1.vec,1);
00040 
00041      }
00042     
00043     Vec4( Vec2<double> ivec0,  double f3, double f4=0) {
00044     vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
00045     arr[2] = f3; arr[3] = f4;
00046     }
00047 
00048    Vec4( Vec2<double> ivec0) {
00049      vec = _mm256_setzero_pd();
00050      vec = _mm256_insertf128_pd(vec,ivec0.vec,0);
00051     }
00052 
00053 
00054     // for masking
00055     void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
00056       Mask4<double> mask(m1,m2,m3,m4); vec=mask.vec; 
00057     }
00058 
00059     void set(double f1, double f2, double f3, double f4=0) {
00060       vec = _mm256_set_pd(f4, f3, f2, f1);
00061     }
00062 
00063     void set1(double f1) {
00064      vec =  _mm256_set1_pd(f1);
00065     }
00066 
00067     template<int N>
00068     Vec4 get1() const {
00069       return _mm256_set1_pd(arr[N]); //FIXME
00070     }
00071     /*
00072     Vec4 get1(unsigned int n) const { 
00073       return _mm256_set1_pd(arr[n]); //FIXME
00074     }
00075     */
00076     double & operator[](unsigned int n) {
00077       return arr[n];
00078     }
00079 
00080     double operator[](unsigned int n) const {
00081       return arr[n];
00082     }
00083     
00084     Vec2<double> xy() const { return  Vec2<double>(_mm256_castpd256_pd128(vec));}
00085     Vec2<double> zw() const { return  Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec,vec,1)));}
00086 
00087   };
00088   
00089   inline Vec4<float>::Vec4(Vec4<double> ivec) {
00090     vec = _mm256_cvtpd_ps(ivec.vec);
00091   }
00092 } // namespace mathSSE
00093 
00094 
00095 
00096 inline bool operator==(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00097   return _mm256_movemask_pd(_mm256_cmp_pd(a.vec,b.vec,_CMP_EQ_OS))==0xf;
00098 }
00099   
00100 inline mathSSE::Vec4<double> cmpeq(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00101   return _mm256_cmp_pd(a.vec,b.vec,_CMP_EQ_OS);
00102 }
00103 
00104 inline mathSSE::Vec4<double> cmpgt(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00105   return _mm256_cmp_pd(a.vec,b.vec,_CMP_GT_OS);
00106 }
00107 
00108 inline mathSSE::Vec4<double> hadd(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00109   return _mm256_hadd_pd(a.vec,b.vec);
00110 }
00111 
00112 
00113 
00114 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a) {
00115   const __m256d neg = _mm256_set_pd ( -0.0 , -0.0 , -0.0, -0.0);
00116   return _mm256_xor_pd(a.vec,neg);
00117 }
00118 
00119 inline mathSSE::Vec4<double> operator&(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00120   return  _mm256_and_pd(a.vec,b.vec);
00121 }
00122 inline mathSSE::Vec4<double> operator|(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00123   return  _mm256_or_pd(a.vec,b.vec);
00124 }
00125 inline mathSSE::Vec4<double> operator^(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00126   return  _mm256_xor_pd(a.vec,b.vec);
00127 }
00128 inline mathSSE::Vec4<double> andnot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00129   return  _mm256_andnot_pd(a.vec,b.vec);
00130 }
00131 
00132 
00133 inline mathSSE::Vec4<double> operator+(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00134   return  _mm256_add_pd(a.vec,b.vec);
00135 }
00136 
00137 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00138   return  _mm256_sub_pd(a.vec,b.vec);
00139 }
00140 
00141 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00142   return  _mm256_mul_pd(a.vec,b.vec);
00143 }
00144 
00145 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00146   return  _mm256_div_pd(a.vec,b.vec);
00147 }
00148 
00149 inline mathSSE::Vec4<double> operator*(double a, mathSSE::Vec4<double> b) {
00150   return  _mm256_mul_pd(_mm256_set1_pd(a),b.vec);
00151 }
00152 
00153 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> b,double a) {
00154   return  _mm256_mul_pd(_mm256_set1_pd(a),b.vec);
00155 }
00156 
00157 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> b,double a) {
00158   return  _mm256_div_pd(b.vec,_mm256_set1_pd(a));
00159 }
00160 
00161 
00162 inline double  
00163 __attribute__((always_inline)) __attribute__ ((pure)) 
00164 dot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00165   using  mathSSE::_mm256_dot_pd;
00166   mathSSE::Vec4<double> ret;
00167   ret.vec = _mm256_dot_pd(a.vec,b.vec);
00168   return ret.arr[0];
00169 }
00170 
00171 inline mathSSE::Vec4<double>  
00172 __attribute__((always_inline)) __attribute__ ((pure)) 
00173 cross(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00174   using  mathSSE::_mm256_cross_pd;
00175   return _mm256_cross_pd(a.vec,b.vec);
00176 }
00177 
00178 inline double  
00179 __attribute__((always_inline)) __attribute__ ((pure)) 
00180 dotxy(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
00181   mathSSE::Vec4<double> mul = a*b;
00182   mul = hadd(mul,mul);
00183   return mul.arr[0];
00184 }
00185 
00186 
00187 #endif