CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch9/src/DataFormats/Math/interface/SSEVec.h

Go to the documentation of this file.
00001 #ifndef DataFormat_Math_SSEVec_H
00002 #define DataFormat_Math_SSEVec_H
00003 
00004 #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ > 4)
00005 #include <x86intrin.h>
00006 #define CMS_USE_SSE
00007 
00008 #else
00009 
00010 #ifdef __SSE2__
00011 #define CMS_USE_SSE
00012 
00013 #include <mmintrin.h>
00014 #include <emmintrin.h>
00015 #endif
00016 #ifdef __SSE3__
00017 #include <pmmintrin.h>
00018 #endif
00019 #ifdef __SSE4_1__
00020 #include <smmintrin.h>
00021 #endif
00022 
00023 #endif
00024 
00025 #include<cmath>
00026 
00027 namespace mathSSE {
00028   template<typename T> inline T sqrt(T t) { return std::sqrt(t);}
00029 }
00030 
00031 namespace mathSSE {
00032   //
00033   template<typename T> inline bool samesign(T rh, T lh);
00034 
00035   template<>
00036   inline bool
00037   __attribute__((always_inline)) __attribute__ ((pure)) samesign<int>(int rh, int lh) {
00038     int const mask= 0x80000000;
00039     return ((rh^lh)&mask) == 0;
00040   }
00041 
00042   template<>
00043   inline bool
00044   __attribute__((always_inline)) __attribute__ ((pure)) samesign<long long>(long long rh, long long lh) {
00045     long long const mask= 0x8000000000000000LL;
00046     return ((rh^lh)&mask) == 0;
00047   }
00048 
00049   template<>
00050   inline bool
00051   __attribute__((always_inline)) __attribute__ ((pure)) samesign<float>(float rh, float lh) {
00052     union { int i; float f; } a, b;
00053     a.f=rh; b.f=lh;
00054     return samesign<int>(a.i,b.i);
00055   }
00056 
00057   template<>
00058   inline bool
00059   __attribute__((always_inline)) __attribute__ ((pure)) samesign<double>(double rh, double lh) {
00060     union { long long i; double f; } a, b;
00061     a.f=rh; b.f=lh;
00062     return samesign<long long>(a.i,b.i);
00063   }
00064 }
00065 
00066 
00067 namespace mathSSE {
00068 #ifdef  CMS_USE_SSE
00069   //dot
00070   inline __m128 _mm_dot_ps(__m128 v1, __m128 v2) {
00071 #ifdef __SSE4_1__
00072     return _mm_dp_ps(v1, v2, 0xff);
00073 #else
00074     __m128 mul = _mm_mul_ps(v1, v2);
00075 #ifdef __SSE3__
00076     mul = _mm_hadd_ps(mul,mul);
00077     return _mm_hadd_ps(mul,mul);
00078 #else
00079     __m128 swp = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(1, 0, 3, 2));
00080     mul = _mm_add_ps(mul, swp);
00081     swp = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 3, 0, 1));
00082     return _mm_add_ps(mul, swp);
00083 #endif
00084 #endif
00085   }
00086   
00087 
00088   // cross (just 3x3) 
00089   inline __m128 _mm_cross_ps(__m128 v1, __m128 v2) {
00090     __m128 v3 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 0, 2, 2));
00091     __m128 v4 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 1, 0, 1));
00092     
00093     __m128 v5 = _mm_mul_ps(v3, v4);
00094     
00095     v3 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 0, 2, 2));
00096     v4 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 1, 0, 1));
00097     
00098     v3 = _mm_mul_ps(v3, v4);
00099     const  __m128 neg = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
00100     return _mm_xor_ps(_mm_sub_ps(v5, v3), neg);
00101   }
00102 
00103 
00104 #endif // CMS_USE_SSE
00105 
00106 
00107   template<typename T>
00108   struct OldVec { T  theX; T  theY; T  theZ; T  theW;}  __attribute__ ((aligned (16)));
00109   
00110 
00111   template<typename T> union Vec2{
00112     Vec2() {
00113       arr[0] = 0; arr[1] = 0;
00114     }
00115     Vec2(T f1, T f2) {
00116       arr[0] = f1; arr[1] = f2;
00117     }
00118     explicit Vec2(T f1) {
00119       arr[0] = f1; arr[1] = f1;
00120     }
00121     void set(T f1, T f2) {
00122       arr[0] = f1; arr[1] = f2;
00123     }
00124     Vec2 get1(unsigned int n) const {
00125       return Vec2(arr[n],arr[n]);
00126     }
00127 
00128     T & operator[](unsigned int n) {
00129       return arr[n];
00130     }
00131 
00132     T operator[](unsigned int n) const {
00133       return arr[n];
00134     }
00135 
00136 
00137     T __attribute__ ((aligned(16))) arr[2];
00138   };
00139 
00140 
00141   template<typename T> union Vec4{
00142     Vec4() {
00143       arr[0] = 0; arr[1] = 0; arr[2] = 0; arr[3]=0;
00144     }
00145     Vec4(float f1, float f2, float f3, float f4=0) {
00146       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00147     }
00148     explicit Vec4(float f1) {
00149       set1(f1);
00150     }
00151     void set(float f1, float f2, float f3, float f4=0) {
00152       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00153     }
00154     void set1(float f1) {
00155       arr[0] = f1; arr[1] = f1; arr[2] = f1; arr[3]=f1;
00156     }
00157     Vec4 get1(unsigned int n) const {
00158       return Vec4(arr[n],arr[n],arr[n],arr[n]);
00159     }
00160 
00161     Vec2<T> xy() const { return  Vec2<T>(arr[0],arr[1]);}
00162     Vec2<T> zw() const { return  Vec2<T>(arr[2],arr[3]);}
00163 
00164 
00165 
00166     T __attribute__ ((aligned(16))) arr[4];
00167     OldVec<T> o;
00168   };
00169 
00170 
00171 #ifdef CMS_USE_SSE
00172 
00173   template<>
00174   union Vec4<float> {
00175     typedef  __m128 nativeType;
00176     __m128 vec;
00177     float __attribute__ ((aligned(16))) arr[4];
00178     OldVec<float> o;
00179     
00180     Vec4(__m128 ivec) : vec(ivec) {}
00181 
00182     Vec4(OldVec<float> const & ivec) : o(ivec) {}
00183     
00184     Vec4() {
00185       vec = _mm_setzero_ps();
00186     }
00187 
00188     explicit Vec4(float f1) {
00189       set1(f1);
00190     }
00191 
00192     Vec4(float f1, float f2, float f3, float f4=0) {
00193       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00194     }
00195 
00196     void set(float f1, float f2, float f3, float f4=0) {
00197       vec = _mm_set_ps(f4, f3, f2, f1);
00198     }
00199     void set1(float f1) {
00200      vec =  _mm_set1_ps(f1);
00201     }
00202 
00203     Vec4 get1(unsigned int n) const { 
00204       return _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(n, n, n, n)); 
00205     }
00206 
00207     float & operator[](unsigned int n) {
00208       return arr[n];
00209     }
00210 
00211     float operator[](unsigned int n) const {
00212       return arr[n];
00213     }
00214     
00215     Vec2<float> xy() const { return  Vec2<float>(arr[0],arr[1]);}
00216     Vec2<float> zw() const { return  Vec2<float>(arr[2],arr[3]);}
00217 
00218   };
00219   
00220   template<>
00221   union Vec2<double> {
00222     typedef  __m128d nativeType;
00223     __m128d vec;
00224     double __attribute__ ((aligned(16))) arr[2];
00225         
00226     Vec2(__m128d ivec) : vec(ivec) {}
00227     
00228     Vec2() {
00229       vec = _mm_setzero_pd();
00230     }
00231 
00232     Vec2(double f1, double f2) {
00233       arr[0] = f1; arr[1] = f2;
00234     }
00235 
00236     explicit Vec2(double f1) {
00237       set1(f1);
00238     }
00239     
00240     void set(double f1, double f2) {
00241       arr[0] = f1; arr[1] = f2;
00242     }
00243 
00244     void set1(double f1) {
00245       vec = _mm_set1_pd(f1);
00246     }
00247 
00248     Vec2 get1(unsigned int n) const {
00249       return Vec2(arr[n],arr[n]);
00250     }
00251    
00252     double operator[](unsigned int n) const {
00253       return arr[n];
00254     }
00255   };
00256  
00257 
00258   template<>
00259   union Vec4<double> {
00260     __m128d vec[2];
00261     double __attribute__ ((aligned(16))) arr[4];
00262     OldVec<double> o;
00263     
00264     Vec4(__m128d ivec[]) {
00265       vec[0] = ivec[0];
00266       vec[1] = ivec[1];
00267     }
00268     
00269     Vec4(__m128d ivec0, __m128d ivec1) {
00270       vec[0] = ivec0;
00271       vec[1] = ivec1;
00272     }
00273     
00274     Vec4() {
00275       vec[0] = _mm_setzero_pd();
00276       vec[1] = _mm_setzero_pd();
00277     }
00278 
00279     explicit Vec4(double f1) {
00280       set1(f1);
00281     }
00282 
00283     Vec4(double f1, double f2, double f3, double f4=0) {
00284       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00285     }
00286     
00287    Vec4( Vec2<double> ivec0,   Vec2<double> ivec1) {
00288       vec[0] = ivec0.vec;
00289       vec[1] = ivec1.vec;
00290     }
00291     
00292     Vec4( Vec2<double> ivec0,  double f3, double f4=0) {
00293       vec[0] = ivec0.vec;
00294       arr[2] = f3; arr[3] = f4;
00295     }
00296 
00297    Vec4( Vec2<double> ivec0) {
00298       vec[0] = ivec0.vec;
00299       vec[1] =  _mm_setzero_pd();
00300     }
00301 
00302 
00303     Vec4(OldVec<double> const & ivec) : o(ivec) {}
00304 
00305     void set(double f1, double f2, double f3, double f4=0) {
00306       arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00307     }
00308 
00309     void set1(double f1) {
00310       vec[0] = vec[1]= _mm_set1_pd(f1);
00311     }
00312 
00313 
00314     Vec4 get1(unsigned int n) const {
00315       return Vec4(arr[n],arr[n],arr[n],arr[n]);
00316     }
00317 
00318     double & operator[](unsigned int n) {
00319       return arr[n];
00320     }
00321 
00322     double operator[](unsigned int n) const {
00323       return arr[n];
00324     }
00325   
00326     Vec2<double> xy() const { return vec[0];}
00327     Vec2<double> zw() const { return vec[1];}
00328 
00329   };
00330 
00331 #endif // CMS_USE_SSE
00332   
00333   typedef Vec4<float> Vec4F;
00334   typedef Vec4<float> Vec3F;
00335   typedef Vec2<double> Vec2D;
00336   typedef Vec4<double> Vec3D;
00337   typedef Vec4<double> Vec4D;
00338 
00339   template<typename T>
00340   struct As3D {
00341     Vec4<T> const & v;
00342     As3D(Vec4<T> const &iv ) : v(iv){}
00343   };
00344 
00345   template<typename T>
00346   inline As3D<T> as3D(Vec4<T> const &v ) { return v;}
00347 
00348 }
00349 
00350 #ifdef CMS_USE_SSE
00351 
00352 
00353 //float op
00354 
00355 inline float dot(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00356   using  mathSSE::_mm_dot_ps;
00357   float s;
00358   _mm_store_ss(&s,_mm_dot_ps(a.vec,b.vec));
00359   return s;
00360 }
00361 
00362 inline mathSSE::Vec4F cross(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00363   using  mathSSE::_mm_cross_ps;
00364   return _mm_cross_ps(a.vec,b.vec);
00365 }
00366 
00367 
00368 inline bool operator==(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00369   return _mm_movemask_ps(_mm_cmpeq_ps(a.vec,b.vec))==0xf;
00370 }
00371 
00372 inline mathSSE::Vec4F cmpeq(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00373   return _mm_cmpeq_ps(a.vec,b.vec);
00374 }
00375 
00376 inline mathSSE::Vec4F cmpgt(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00377   return _mm_cmpgt_ps(a.vec,b.vec);
00378 }
00379 
00380 #ifdef __SSE3__
00381 inline mathSSE::Vec4F hadd(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00382   return _mm_hadd_ps(a.vec,b.vec);
00383 }
00384 #endif
00385 
00386 
00387 inline mathSSE::Vec4F operator-(mathSSE::Vec4F a) {
00388   const __m128 neg = _mm_set_ps ( -0.0 , -0.0 , -0.0, -0.0);
00389   return _mm_xor_ps(a.vec,neg);
00390 }
00391 
00392 inline mathSSE::Vec4F operator&(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00393   return  _mm_and_ps(a.vec,b.vec);
00394 }
00395 inline mathSSE::Vec4F operator|(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00396   return  _mm_or_ps(a.vec,b.vec);
00397 }
00398 inline mathSSE::Vec4F operator^(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00399   return  _mm_xor_ps(a.vec,b.vec);
00400 }
00401 inline mathSSE::Vec4F andnot(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00402   return  _mm_andnot_ps(a.vec,b.vec);
00403 }
00404 
00405 
00406 inline mathSSE::Vec4F operator+(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00407   return  _mm_add_ps(a.vec,b.vec);
00408 }
00409 
00410 inline mathSSE::Vec4F operator-(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00411   return  _mm_sub_ps(a.vec,b.vec);
00412 }
00413 
00414 inline mathSSE::Vec4F operator*(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00415   return  _mm_mul_ps(a.vec,b.vec);
00416 }
00417 
00418 inline mathSSE::Vec4F operator/(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00419   return  _mm_div_ps(a.vec,b.vec);
00420 }
00421 
00422 inline mathSSE::Vec4F operator*(float a, mathSSE::Vec4F b) {
00423   return  _mm_mul_ps(_mm_set1_ps(a),b.vec);
00424 }
00425 
00426 inline mathSSE::Vec4F operator*(mathSSE::Vec4F b,float a) {
00427   return  _mm_mul_ps(_mm_set1_ps(a),b.vec);
00428 }
00429 
00430 
00431 // double op 2d
00432 inline mathSSE::Vec2D operator-(mathSSE::Vec2D a) {
00433   const __m128d neg = _mm_set_pd ( -0.0 , -0.0);
00434   return _mm_xor_pd(a.vec,neg);
00435 }
00436 
00437 
00438 inline mathSSE::Vec2D operator&(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00439   return  _mm_and_pd(a.vec,b.vec);
00440 }
00441 inline mathSSE::Vec2D operator|(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00442   return  _mm_or_pd(a.vec,b.vec);
00443 }
00444 inline mathSSE::Vec2D operator^(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00445   return  _mm_xor_pd(a.vec,b.vec);
00446 }
00447 inline mathSSE::Vec2D andnot(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00448   return  _mm_andnot_pd(a.vec,b.vec);
00449 }
00450 
00451 
00452 inline mathSSE::Vec2D operator+(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00453   return  _mm_add_pd(a.vec,b.vec);
00454 }
00455 
00456 inline mathSSE::Vec2D operator-(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00457   return  _mm_sub_pd(a.vec,b.vec);
00458 }
00459 
00460 inline mathSSE::Vec2D operator*(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00461   return  _mm_mul_pd(a.vec,b.vec);
00462 }
00463 
00464 inline mathSSE::Vec2D operator/(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00465   return  _mm_div_pd(a.vec,b.vec);
00466 }
00467 
00468 inline mathSSE::Vec2D operator*(double a, mathSSE::Vec2D b) {
00469   return  _mm_mul_pd(_mm_set1_pd(a),b.vec);
00470 }
00471 
00472 inline mathSSE::Vec2D operator*(mathSSE::Vec2D b,double a) {
00473   return  _mm_mul_pd(_mm_set1_pd(a),b.vec);
00474 }
00475 
00476 inline double dot(mathSSE::Vec2D a, mathSSE::Vec2D b)  __attribute__((always_inline)) __attribute__ ((pure));
00477 
00478 inline double dot(mathSSE::Vec2D a, mathSSE::Vec2D b){
00479   __m128d res = _mm_mul_pd ( a.vec, b.vec);
00480   res = _mm_add_sd (  _mm_shuffle_pd ( res , res, 1 ), res );
00481   double s;
00482   _mm_store_sd(&s,res);
00483   return s;
00484 }
00485 
00486 inline double cross(mathSSE::Vec2D a, mathSSE::Vec2D b)  __attribute__((always_inline)) __attribute__ ((pure));
00487 
00488 inline double cross(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00489   __m128d res =  _mm_shuffle_pd ( b.vec, b.vec, 1);
00490   res = _mm_mul_pd (  a.vec , res );
00491   res = _mm_sub_sd (res, _mm_shuffle_pd ( res , res, 1 ));
00492   double s;
00493   _mm_store_sd(&s,res);
00494   return s;
00495 }
00496 
00497 
00498 // double op 3d
00499 
00500 inline bool operator==(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00501   return 
00502     _mm_movemask_pd(_mm_cmpeq_pd(a.vec[0],b.vec[0]))==0x3 && 
00503     _mm_movemask_pd(_mm_cmpeq_pd(a.vec[1],b.vec[1]))==0x3 ;
00504 }
00505 
00506 inline mathSSE::Vec4D operator-(mathSSE::Vec4D a) {
00507   const __m128d neg = _mm_set_pd ( -0.0 , -0.0);
00508   return mathSSE::Vec4D(_mm_xor_pd(a.vec[0],neg),_mm_xor_pd(a.vec[1],neg));
00509 }
00510 
00511 
00512 inline mathSSE::Vec4D operator+(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00513   return  mathSSE::Vec4D(_mm_add_pd(a.vec[0],b.vec[0]),_mm_add_pd(a.vec[1],b.vec[1]));
00514 }
00515 inline mathSSE::Vec4D operator-(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00516   return  mathSSE::Vec4D(_mm_sub_pd(a.vec[0],b.vec[0]),_mm_sub_pd(a.vec[1],b.vec[1]));
00517 }
00518 inline mathSSE::Vec4D operator*(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00519   return  mathSSE::Vec4D(_mm_mul_pd(a.vec[0],b.vec[0]),_mm_mul_pd(a.vec[1],b.vec[1]));
00520 }
00521 inline mathSSE::Vec4D operator/(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00522   return  mathSSE::Vec4D(_mm_div_pd(a.vec[0],b.vec[0]),_mm_div_pd(a.vec[1],b.vec[1]));
00523 }
00524 
00525 inline mathSSE::Vec4D operator*(double a, mathSSE::Vec4D b) {
00526   __m128d res = _mm_set1_pd(a);
00527   return  mathSSE::Vec4D(_mm_mul_pd(res,b.vec[0]),_mm_mul_pd(res,b.vec[1]));
00528 }
00529 
00530 inline mathSSE::Vec4D operator*(mathSSE::Vec4D b, double a) {
00531   __m128d res = _mm_set1_pd(a);
00532   return  mathSSE::Vec4D(_mm_mul_pd(res,b.vec[0]),_mm_mul_pd(res,b.vec[1]));
00533 }
00534 
00535 
00536 
00537 inline double dot(mathSSE::Vec4D a, mathSSE::Vec4D b) __attribute__((always_inline)) __attribute__ ((pure));
00538 
00539 inline double dot(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00540   __m128d res = _mm_add_sd ( _mm_mul_pd ( a.vec[0], b.vec[0]),
00541                              _mm_mul_sd ( a.vec[1], b.vec[1]) 
00542                              );
00543   res = _mm_add_sd ( _mm_unpackhi_pd ( res , res ), res );
00544   double s;
00545   _mm_store_sd(&s,res);
00546   return s;
00547 }
00548 
00549 inline mathSSE::Vec4D cross(mathSSE::Vec4D a, mathSSE::Vec4D b) __attribute__((always_inline)) __attribute__ ((pure));
00550  
00551 inline mathSSE::Vec4D cross(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00552   const __m128d neg = _mm_set_pd ( 0.0 , -0.0 );
00553   // lh .z * rh .x , lh .z * rh .y
00554   __m128d l1 = _mm_mul_pd ( _mm_unpacklo_pd ( a.vec[1] , a.vec[1] ), b.vec[0] );
00555   // rh .z * lh .x , rh .z * lh .y
00556   __m128d l2 = _mm_mul_pd ( _mm_unpacklo_pd (  b.vec[1],  b.vec[1] ),  a.vec[0] );
00557   __m128d m1 = _mm_sub_pd ( l1 , l2 ); // l1 - l2
00558   m1 = _mm_shuffle_pd ( m1 , m1 , 1 ); // switch the elements
00559   m1 = _mm_xor_pd ( m1 , neg ); // change the sign of the first element
00560   // lh .x * rh .y , lh .y * rh .x
00561   l1 = _mm_mul_pd (  a.vec[0] , _mm_shuffle_pd (  b.vec[0] ,  b.vec[0] , 1 ) );
00562   // lh .x * rh .y - lh .y * rh .x
00563   __m128d m2 = _mm_sub_sd ( l1 , _mm_unpackhi_pd ( l1 , l1 ) );
00564 
00565   return  mathSSE::Vec4D( m1 , m2 );
00566 }
00567 
00568 
00569 
00570 // sqrt
00571 namespace mathSSE {
00572   template<> inline Vec4F sqrt(Vec4F v) { return _mm_sqrt_ps(v.vec);}
00573   template<> inline Vec2D sqrt(Vec2D v) { return _mm_sqrt_pd(v.vec);}
00574   template<> inline Vec4D sqrt(Vec4D v) { 
00575     return Vec4D(_mm_sqrt_pd(v.vec[0]),_mm_sqrt_pd(v.vec[1]));
00576   }
00577 }
00578 
00579 // chephes func
00580 #include "DataFormats/Math/interface/sse_mathfun.h"
00581 namespace mathSSE {
00582   inline Vec4F log(Vec4F v) { return log_ps(v.vec);}
00583   inline Vec4F exp(Vec4F v) { return exp_ps(v.vec);}
00584   inline Vec4F sin(Vec4F v) { return sin_ps(v.vec);}
00585   inline Vec4F cos(Vec4F v) { return cos_ps(v.vec);}
00586   inline void sincos(Vec4F v, Vec4F & s, Vec4F & c) { sincos_ps(v.vec,&s.vec, &c.vec);}
00587 
00588   inline float log(float f) { float s; _mm_store_ss(&s,log_ps(_mm_load_ss(&f))); return s;}
00589   inline float exp(float f) { float s; _mm_store_ss(&s,exp_ps(_mm_load_ss(&f))); return s;}
00590   inline float sin(float f) { float s; _mm_store_ss(&s,sin_ps(_mm_load_ss(&f))); return s;}
00591   inline float cos(float f) { float s; _mm_store_ss(&s,log_ps(_mm_load_ss(&f))); return s;}
00592   inline void sincos(float f, float & s, float & c) { 
00593     __m128 vs, vc; 
00594     sincos_ps(_mm_load_ss(&f),&vs, &vc);   
00595     _mm_store_ss(&s,vs);_mm_store_ss(&c,vc);   
00596   }
00597 }
00598 #endif // CMS_USE_SSE
00599 
00600 
00601 #include <iosfwd>
00602 std::ostream & operator<<(std::ostream & out, mathSSE::Vec2D const & v);
00603 std::ostream & operator<<(std::ostream & out, mathSSE::Vec4F const & v);
00604 std::ostream & operator<<(std::ostream & out, mathSSE::Vec4D const & v);
00605 
00606 std::ostream & operator<<(std::ostream & out, mathSSE::As3D<float> const & v);
00607 std::ostream & operator<<(std::ostream & out, mathSSE::As3D<double> const & v);
00608 
00609 
00610 #endif // DataFormat_Math_SSEVec_H