00001 #ifndef DataFormat_Math_SSEVec_H
00002 #define DataFormat_Math_SSEVec_H
00003
00004 #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ > 4)
00005 #include <x86intrin.h>
00006 #define CMS_USE_SSE
00007
00008 #else
00009
00010 #ifdef __SSE2__
00011 #define CMS_USE_SSE
00012
00013 #include <mmintrin.h>
00014 #include <emmintrin.h>
00015 #endif
00016 #ifdef __SSE3__
00017 #include <pmmintrin.h>
00018 #endif
00019 #ifdef __SSE4_1__
00020 #include <smmintrin.h>
00021 #endif
00022
00023 #endif
00024
00025 #include<cmath>
00026
00027 namespace mathSSE {
00028 template<typename T> inline T sqrt(T t) { return std::sqrt(t);}
00029 }
00030
00031 namespace mathSSE {
00032
00033 template<typename T> inline bool samesign(T rh, T lh);
00034
00035 template<>
00036 inline bool
00037 __attribute__((always_inline)) __attribute__ ((pure)) samesign<int>(int rh, int lh) {
00038 int const mask= 0x80000000;
00039 return ((rh^lh)&mask) == 0;
00040 }
00041
00042 template<>
00043 inline bool
00044 __attribute__((always_inline)) __attribute__ ((pure)) samesign<long long>(long long rh, long long lh) {
00045 long long const mask= 0x8000000000000000LL;
00046 return ((rh^lh)&mask) == 0;
00047 }
00048
00049 template<>
00050 inline bool
00051 __attribute__((always_inline)) __attribute__ ((pure)) samesign<float>(float rh, float lh) {
00052 union { int i; float f; } a, b;
00053 a.f=rh; b.f=lh;
00054 return samesign<int>(a.i,b.i);
00055 }
00056
00057 template<>
00058 inline bool
00059 __attribute__((always_inline)) __attribute__ ((pure)) samesign<double>(double rh, double lh) {
00060 union { long long i; double f; } a, b;
00061 a.f=rh; b.f=lh;
00062 return samesign<long long>(a.i,b.i);
00063 }
00064 }
00065
00066
00067 namespace mathSSE {
00068 #ifdef CMS_USE_SSE
00069
00070 inline __m128 _mm_dot_ps(__m128 v1, __m128 v2) {
00071 #ifdef __SSE4_1__
00072 return _mm_dp_ps(v1, v2, 0xff);
00073 #else
00074 __m128 mul = _mm_mul_ps(v1, v2);
00075 #ifdef __SSE3__
00076 mul = _mm_hadd_ps(mul,mul);
00077 return _mm_hadd_ps(mul,mul);
00078 #else
00079 __m128 swp = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(1, 0, 3, 2));
00080 mul = _mm_add_ps(mul, swp);
00081 swp = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 3, 0, 1));
00082 return _mm_add_ps(mul, swp);
00083 #endif
00084 #endif
00085 }
00086
00087
00088
00089 inline __m128 _mm_cross_ps(__m128 v1, __m128 v2) {
00090 __m128 v3 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 0, 2, 2));
00091 __m128 v4 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 1, 0, 1));
00092
00093 __m128 v5 = _mm_mul_ps(v3, v4);
00094
00095 v3 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 0, 2, 2));
00096 v4 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 1, 0, 1));
00097
00098 v3 = _mm_mul_ps(v3, v4);
00099 const __m128 neg = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
00100 return _mm_xor_ps(_mm_sub_ps(v5, v3), neg);
00101 }
00102
00103
00104 #endif // CMS_USE_SSE
00105
00106
00107 template<typename T>
00108 struct OldVec { T theX; T theY; T theZ; T theW;} __attribute__ ((aligned (16)));
00109
00110
00111 template<typename T> union Vec2{
00112 Vec2() {
00113 arr[0] = 0; arr[1] = 0;
00114 }
00115 Vec2(T f1, T f2) {
00116 arr[0] = f1; arr[1] = f2;
00117 }
00118 explicit Vec2(T f1) {
00119 arr[0] = f1; arr[1] = f1;
00120 }
00121 void set(T f1, T f2) {
00122 arr[0] = f1; arr[1] = f2;
00123 }
00124 Vec2 get1(unsigned int n) const {
00125 return Vec2(arr[n],arr[n]);
00126 }
00127
00128 T & operator[](unsigned int n) {
00129 return arr[n];
00130 }
00131
00132 T operator[](unsigned int n) const {
00133 return arr[n];
00134 }
00135
00136
00137 T __attribute__ ((aligned(16))) arr[2];
00138 };
00139
00140
00141 template<typename T> union Vec4{
00142 Vec4() {
00143 arr[0] = 0; arr[1] = 0; arr[2] = 0; arr[3]=0;
00144 }
00145 Vec4(float f1, float f2, float f3, float f4=0) {
00146 arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00147 }
00148 explicit Vec4(float f1) {
00149 set1(f1);
00150 }
00151 void set(float f1, float f2, float f3, float f4=0) {
00152 arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00153 }
00154 void set1(float f1) {
00155 arr[0] = f1; arr[1] = f1; arr[2] = f1; arr[3]=f1;
00156 }
00157 Vec4 get1(unsigned int n) const {
00158 return Vec4(arr[n],arr[n],arr[n],arr[n]);
00159 }
00160
00161 Vec2<T> xy() const { return Vec2<T>(arr[0],arr[1]);}
00162 Vec2<T> zw() const { return Vec2<T>(arr[2],arr[3]);}
00163
00164
00165
00166 T __attribute__ ((aligned(16))) arr[4];
00167 OldVec<T> o;
00168 };
00169
00170
00171 #ifdef CMS_USE_SSE
00172
00173 template<>
00174 union Vec4<float> {
00175 typedef __m128 nativeType;
00176 __m128 vec;
00177 float __attribute__ ((aligned(16))) arr[4];
00178 OldVec<float> o;
00179
00180 Vec4(__m128 ivec) : vec(ivec) {}
00181
00182 Vec4(OldVec<float> const & ivec) : o(ivec) {}
00183
00184 Vec4() {
00185 vec = _mm_setzero_ps();
00186 }
00187
00188 explicit Vec4(float f1) {
00189 set1(f1);
00190 }
00191
00192 Vec4(float f1, float f2, float f3, float f4=0) {
00193 arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00194 }
00195
00196 void set(float f1, float f2, float f3, float f4=0) {
00197 vec = _mm_set_ps(f4, f3, f2, f1);
00198 }
00199 void set1(float f1) {
00200 vec = _mm_set1_ps(f1);
00201 }
00202
00203 Vec4 get1(unsigned int n) const {
00204 return _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(n, n, n, n));
00205 }
00206
00207 float & operator[](unsigned int n) {
00208 return arr[n];
00209 }
00210
00211 float operator[](unsigned int n) const {
00212 return arr[n];
00213 }
00214
00215 Vec2<float> xy() const { return Vec2<float>(arr[0],arr[1]);}
00216 Vec2<float> zw() const { return Vec2<float>(arr[2],arr[3]);}
00217
00218 };
00219
00220 template<>
00221 union Vec2<double> {
00222 typedef __m128d nativeType;
00223 __m128d vec;
00224 double __attribute__ ((aligned(16))) arr[2];
00225
00226 Vec2(__m128d ivec) : vec(ivec) {}
00227
00228 Vec2() {
00229 vec = _mm_setzero_pd();
00230 }
00231
00232 Vec2(double f1, double f2) {
00233 arr[0] = f1; arr[1] = f2;
00234 }
00235
00236 explicit Vec2(double f1) {
00237 set1(f1);
00238 }
00239
00240 void set(double f1, double f2) {
00241 arr[0] = f1; arr[1] = f2;
00242 }
00243
00244 void set1(double f1) {
00245 vec = _mm_set1_pd(f1);
00246 }
00247
00248 Vec2 get1(unsigned int n) const {
00249 return Vec2(arr[n],arr[n]);
00250 }
00251
00252 double operator[](unsigned int n) const {
00253 return arr[n];
00254 }
00255 };
00256
00257
00258 template<>
00259 union Vec4<double> {
00260 __m128d vec[2];
00261 double __attribute__ ((aligned(16))) arr[4];
00262 OldVec<double> o;
00263
00264 Vec4(__m128d ivec[]) {
00265 vec[0] = ivec[0];
00266 vec[1] = ivec[1];
00267 }
00268
00269 Vec4(__m128d ivec0, __m128d ivec1) {
00270 vec[0] = ivec0;
00271 vec[1] = ivec1;
00272 }
00273
00274 Vec4() {
00275 vec[0] = _mm_setzero_pd();
00276 vec[1] = _mm_setzero_pd();
00277 }
00278
00279 explicit Vec4(double f1) {
00280 set1(f1);
00281 }
00282
00283 Vec4(double f1, double f2, double f3, double f4=0) {
00284 arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00285 }
00286
00287 Vec4( Vec2<double> ivec0, Vec2<double> ivec1) {
00288 vec[0] = ivec0.vec;
00289 vec[1] = ivec1.vec;
00290 }
00291
00292 Vec4( Vec2<double> ivec0, double f3, double f4=0) {
00293 vec[0] = ivec0.vec;
00294 arr[2] = f3; arr[3] = f4;
00295 }
00296
00297 Vec4( Vec2<double> ivec0) {
00298 vec[0] = ivec0.vec;
00299 vec[1] = _mm_setzero_pd();
00300 }
00301
00302
00303 Vec4(OldVec<double> const & ivec) : o(ivec) {}
00304
00305 void set(double f1, double f2, double f3, double f4=0) {
00306 arr[0] = f1; arr[1] = f2; arr[2] = f3; arr[3]=f4;
00307 }
00308
00309 void set1(double f1) {
00310 vec[0] = vec[1]= _mm_set1_pd(f1);
00311 }
00312
00313
00314 Vec4 get1(unsigned int n) const {
00315 return Vec4(arr[n],arr[n],arr[n],arr[n]);
00316 }
00317
00318 double & operator[](unsigned int n) {
00319 return arr[n];
00320 }
00321
00322 double operator[](unsigned int n) const {
00323 return arr[n];
00324 }
00325
00326 Vec2<double> xy() const { return vec[0];}
00327 Vec2<double> zw() const { return vec[1];}
00328
00329 };
00330
00331 #endif // CMS_USE_SSE
00332
00333 typedef Vec4<float> Vec4F;
00334 typedef Vec4<float> Vec3F;
00335 typedef Vec2<double> Vec2D;
00336 typedef Vec4<double> Vec3D;
00337 typedef Vec4<double> Vec4D;
00338
00339 template<typename T>
00340 struct As3D {
00341 Vec4<T> const & v;
00342 As3D(Vec4<T> const &iv ) : v(iv){}
00343 };
00344
00345 template<typename T>
00346 inline As3D<T> as3D(Vec4<T> const &v ) { return v;}
00347
00348 }
00349
00350 #ifdef CMS_USE_SSE
00351
00352
00353
00354
00355 inline float dot(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00356 using mathSSE::_mm_dot_ps;
00357 float s;
00358 _mm_store_ss(&s,_mm_dot_ps(a.vec,b.vec));
00359 return s;
00360 }
00361
00362 inline mathSSE::Vec4F cross(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00363 using mathSSE::_mm_cross_ps;
00364 return _mm_cross_ps(a.vec,b.vec);
00365 }
00366
00367
00368 inline bool operator==(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00369 return _mm_movemask_ps(_mm_cmpeq_ps(a.vec,b.vec))==0xf;
00370 }
00371
00372 inline mathSSE::Vec4F cmpeq(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00373 return _mm_cmpeq_ps(a.vec,b.vec);
00374 }
00375
00376 inline mathSSE::Vec4F cmpgt(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00377 return _mm_cmpgt_ps(a.vec,b.vec);
00378 }
00379
00380 #ifdef __SSE3__
00381 inline mathSSE::Vec4F hadd(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00382 return _mm_hadd_ps(a.vec,b.vec);
00383 }
00384 #endif
00385
00386
00387 inline mathSSE::Vec4F operator-(mathSSE::Vec4F a) {
00388 const __m128 neg = _mm_set_ps ( -0.0 , -0.0 , -0.0, -0.0);
00389 return _mm_xor_ps(a.vec,neg);
00390 }
00391
00392 inline mathSSE::Vec4F operator&(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00393 return _mm_and_ps(a.vec,b.vec);
00394 }
00395 inline mathSSE::Vec4F operator|(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00396 return _mm_or_ps(a.vec,b.vec);
00397 }
00398 inline mathSSE::Vec4F operator^(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00399 return _mm_xor_ps(a.vec,b.vec);
00400 }
00401 inline mathSSE::Vec4F andnot(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00402 return _mm_andnot_ps(a.vec,b.vec);
00403 }
00404
00405
00406 inline mathSSE::Vec4F operator+(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00407 return _mm_add_ps(a.vec,b.vec);
00408 }
00409
00410 inline mathSSE::Vec4F operator-(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00411 return _mm_sub_ps(a.vec,b.vec);
00412 }
00413
00414 inline mathSSE::Vec4F operator*(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00415 return _mm_mul_ps(a.vec,b.vec);
00416 }
00417
00418 inline mathSSE::Vec4F operator/(mathSSE::Vec4F a, mathSSE::Vec4F b) {
00419 return _mm_div_ps(a.vec,b.vec);
00420 }
00421
00422 inline mathSSE::Vec4F operator*(float a, mathSSE::Vec4F b) {
00423 return _mm_mul_ps(_mm_set1_ps(a),b.vec);
00424 }
00425
00426 inline mathSSE::Vec4F operator*(mathSSE::Vec4F b,float a) {
00427 return _mm_mul_ps(_mm_set1_ps(a),b.vec);
00428 }
00429
00430
00431
00432 inline mathSSE::Vec2D operator-(mathSSE::Vec2D a) {
00433 const __m128d neg = _mm_set_pd ( -0.0 , -0.0);
00434 return _mm_xor_pd(a.vec,neg);
00435 }
00436
00437
00438 inline mathSSE::Vec2D operator&(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00439 return _mm_and_pd(a.vec,b.vec);
00440 }
00441 inline mathSSE::Vec2D operator|(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00442 return _mm_or_pd(a.vec,b.vec);
00443 }
00444 inline mathSSE::Vec2D operator^(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00445 return _mm_xor_pd(a.vec,b.vec);
00446 }
00447 inline mathSSE::Vec2D andnot(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00448 return _mm_andnot_pd(a.vec,b.vec);
00449 }
00450
00451
00452 inline mathSSE::Vec2D operator+(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00453 return _mm_add_pd(a.vec,b.vec);
00454 }
00455
00456 inline mathSSE::Vec2D operator-(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00457 return _mm_sub_pd(a.vec,b.vec);
00458 }
00459
00460 inline mathSSE::Vec2D operator*(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00461 return _mm_mul_pd(a.vec,b.vec);
00462 }
00463
00464 inline mathSSE::Vec2D operator/(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00465 return _mm_div_pd(a.vec,b.vec);
00466 }
00467
00468 inline mathSSE::Vec2D operator*(double a, mathSSE::Vec2D b) {
00469 return _mm_mul_pd(_mm_set1_pd(a),b.vec);
00470 }
00471
00472 inline mathSSE::Vec2D operator*(mathSSE::Vec2D b,double a) {
00473 return _mm_mul_pd(_mm_set1_pd(a),b.vec);
00474 }
00475
00476 inline double dot(mathSSE::Vec2D a, mathSSE::Vec2D b) __attribute__((always_inline)) __attribute__ ((pure));
00477
00478 inline double dot(mathSSE::Vec2D a, mathSSE::Vec2D b){
00479 __m128d res = _mm_mul_pd ( a.vec, b.vec);
00480 res = _mm_add_sd ( _mm_shuffle_pd ( res , res, 1 ), res );
00481 double s;
00482 _mm_store_sd(&s,res);
00483 return s;
00484 }
00485
00486 inline double cross(mathSSE::Vec2D a, mathSSE::Vec2D b) __attribute__((always_inline)) __attribute__ ((pure));
00487
00488 inline double cross(mathSSE::Vec2D a, mathSSE::Vec2D b) {
00489 __m128d res = _mm_shuffle_pd ( b.vec, b.vec, 1);
00490 res = _mm_mul_pd ( a.vec , res );
00491 res = _mm_sub_sd (res, _mm_shuffle_pd ( res , res, 1 ));
00492 double s;
00493 _mm_store_sd(&s,res);
00494 return s;
00495 }
00496
00497
00498
00499
00500 inline bool operator==(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00501 return
00502 _mm_movemask_pd(_mm_cmpeq_pd(a.vec[0],b.vec[0]))==0x3 &&
00503 _mm_movemask_pd(_mm_cmpeq_pd(a.vec[1],b.vec[1]))==0x3 ;
00504 }
00505
00506 inline mathSSE::Vec4D operator-(mathSSE::Vec4D a) {
00507 const __m128d neg = _mm_set_pd ( -0.0 , -0.0);
00508 return mathSSE::Vec4D(_mm_xor_pd(a.vec[0],neg),_mm_xor_pd(a.vec[1],neg));
00509 }
00510
00511
00512 inline mathSSE::Vec4D operator+(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00513 return mathSSE::Vec4D(_mm_add_pd(a.vec[0],b.vec[0]),_mm_add_pd(a.vec[1],b.vec[1]));
00514 }
00515 inline mathSSE::Vec4D operator-(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00516 return mathSSE::Vec4D(_mm_sub_pd(a.vec[0],b.vec[0]),_mm_sub_pd(a.vec[1],b.vec[1]));
00517 }
00518 inline mathSSE::Vec4D operator*(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00519 return mathSSE::Vec4D(_mm_mul_pd(a.vec[0],b.vec[0]),_mm_mul_pd(a.vec[1],b.vec[1]));
00520 }
00521 inline mathSSE::Vec4D operator/(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00522 return mathSSE::Vec4D(_mm_div_pd(a.vec[0],b.vec[0]),_mm_div_pd(a.vec[1],b.vec[1]));
00523 }
00524
00525 inline mathSSE::Vec4D operator*(double a, mathSSE::Vec4D b) {
00526 __m128d res = _mm_set1_pd(a);
00527 return mathSSE::Vec4D(_mm_mul_pd(res,b.vec[0]),_mm_mul_pd(res,b.vec[1]));
00528 }
00529
00530 inline mathSSE::Vec4D operator*(mathSSE::Vec4D b, double a) {
00531 __m128d res = _mm_set1_pd(a);
00532 return mathSSE::Vec4D(_mm_mul_pd(res,b.vec[0]),_mm_mul_pd(res,b.vec[1]));
00533 }
00534
00535
00536
00537 inline double dot(mathSSE::Vec4D a, mathSSE::Vec4D b) __attribute__((always_inline)) __attribute__ ((pure));
00538
00539 inline double dot(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00540 __m128d res = _mm_add_sd ( _mm_mul_pd ( a.vec[0], b.vec[0]),
00541 _mm_mul_sd ( a.vec[1], b.vec[1])
00542 );
00543 res = _mm_add_sd ( _mm_unpackhi_pd ( res , res ), res );
00544 double s;
00545 _mm_store_sd(&s,res);
00546 return s;
00547 }
00548
00549 inline mathSSE::Vec4D cross(mathSSE::Vec4D a, mathSSE::Vec4D b) __attribute__((always_inline)) __attribute__ ((pure));
00550
00551 inline mathSSE::Vec4D cross(mathSSE::Vec4D a, mathSSE::Vec4D b) {
00552 const __m128d neg = _mm_set_pd ( 0.0 , -0.0 );
00553
00554 __m128d l1 = _mm_mul_pd ( _mm_unpacklo_pd ( a.vec[1] , a.vec[1] ), b.vec[0] );
00555
00556 __m128d l2 = _mm_mul_pd ( _mm_unpacklo_pd ( b.vec[1], b.vec[1] ), a.vec[0] );
00557 __m128d m1 = _mm_sub_pd ( l1 , l2 );
00558 m1 = _mm_shuffle_pd ( m1 , m1 , 1 );
00559 m1 = _mm_xor_pd ( m1 , neg );
00560
00561 l1 = _mm_mul_pd ( a.vec[0] , _mm_shuffle_pd ( b.vec[0] , b.vec[0] , 1 ) );
00562
00563 __m128d m2 = _mm_sub_sd ( l1 , _mm_unpackhi_pd ( l1 , l1 ) );
00564
00565 return mathSSE::Vec4D( m1 , m2 );
00566 }
00567
00568
00569
00570
00571 namespace mathSSE {
00572 template<> inline Vec4F sqrt(Vec4F v) { return _mm_sqrt_ps(v.vec);}
00573 template<> inline Vec2D sqrt(Vec2D v) { return _mm_sqrt_pd(v.vec);}
00574 template<> inline Vec4D sqrt(Vec4D v) {
00575 return Vec4D(_mm_sqrt_pd(v.vec[0]),_mm_sqrt_pd(v.vec[1]));
00576 }
00577 }
00578
00579
00580 #include "DataFormats/Math/interface/sse_mathfun.h"
00581 namespace mathSSE {
00582 inline Vec4F log(Vec4F v) { return log_ps(v.vec);}
00583 inline Vec4F exp(Vec4F v) { return exp_ps(v.vec);}
00584 inline Vec4F sin(Vec4F v) { return sin_ps(v.vec);}
00585 inline Vec4F cos(Vec4F v) { return cos_ps(v.vec);}
00586 inline void sincos(Vec4F v, Vec4F & s, Vec4F & c) { sincos_ps(v.vec,&s.vec, &c.vec);}
00587
00588 inline float log(float f) { float s; _mm_store_ss(&s,log_ps(_mm_load_ss(&f))); return s;}
00589 inline float exp(float f) { float s; _mm_store_ss(&s,exp_ps(_mm_load_ss(&f))); return s;}
00590 inline float sin(float f) { float s; _mm_store_ss(&s,sin_ps(_mm_load_ss(&f))); return s;}
00591 inline float cos(float f) { float s; _mm_store_ss(&s,log_ps(_mm_load_ss(&f))); return s;}
00592 inline void sincos(float f, float & s, float & c) {
00593 __m128 vs, vc;
00594 sincos_ps(_mm_load_ss(&f),&vs, &vc);
00595 _mm_store_ss(&s,vs);_mm_store_ss(&c,vc);
00596 }
00597 }
00598 #endif // CMS_USE_SSE
00599
00600
00601 #include <iosfwd>
00602 std::ostream & operator<<(std::ostream & out, mathSSE::Vec2D const & v);
00603 std::ostream & operator<<(std::ostream & out, mathSSE::Vec4F const & v);
00604 std::ostream & operator<<(std::ostream & out, mathSSE::Vec4D const & v);
00605
00606 std::ostream & operator<<(std::ostream & out, mathSSE::As3D<float> const & v);
00607 std::ostream & operator<<(std::ostream & out, mathSSE::As3D<double> const & v);
00608
00609
00610 #endif // DataFormat_Math_SSEVec_H