CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
VDTMath.cc
Go to the documentation of this file.
1 /*******************************************************************************
2  *
3  * VDT math library: collection of double precision vectorisable trascendental
4  * functions.
5  * The c++11 standard is used: remember to enable it for the compilation.
6  *
7  * The basic idea is to exploit pade polinomials.
8  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
9  * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos,
10  * tan, asin, acos and atan functions. The Cephes library can be found here:
11  * http://www.netlib.org/cephes/
12  *
13  ******************************************************************************/
14 
16 //#include "VDTMath.h"
17 #include <stdlib.h>
18 #include <iostream>
19 #include <cassert>
20 #include <iomanip>
21 
22 //------------------------------------------------------------------------------
23 
29  std::cout << "\nList of enabled instructions' sets:\n";
30 
31 #ifdef __SSE2__
32  std::cout << " - SSE2 instructions enabled" << std::endl;
33 #else
34  std::cout << " - SSE2 instructions *not* enabled" << std::endl;
35 #endif
36 #ifdef __SSE3__
37  std::cout << " - SSE3 instructions enabled" << std::endl;
38 #else
39  std::cout << " - SSE3 instructions *not* enabled" << std::endl;
40 #endif
41 
42 #ifdef __SSE4_1__
43  std::cout << " - SSE4.1 instructions enabled" << std::endl;
44 #else
45  std::cout << " - SSE4.1 instructions *not* enabled" << std::endl;
46 #endif
47 #ifdef __AVX__
48  std::cout << " - AVX instructions enabled" << std::endl;
49 #else
50  std::cout << " - AVX instructions *not* enabled" << std::endl;
51 #endif
52  std::cout << "\n\n";
53  }
54 
55 
56 /*******************************************************************************
57  *
58  * EXP IMPLEMENTATIONS
59  *
60  ******************************************************************************/
61 
63 void vdt::fast_exp_vect_46(double const * input,
64  double * output,
65  const unsigned int arr_size){
66 
67  // input & output must not point to the same memory location
68  // assert( input != output );
69 
70  int n;
71  int* nv = new int[arr_size];
72  double xx, px, qx;
73  ieee754 u;
74 
75  // for vectorisation
76  double x;
77 
78  for (unsigned int i = 0; i < arr_size; ++i)
79  nv[i] = n = std::floor( LOG2E * input[i] + 0.5 );
80 
81 
82  //Profitability threshold = 7
83  for (unsigned int i = 0; i < arr_size; ++i) {
84  x = input[i];
85 
86  //nv[i] = n = int(LOG2E * x + 0.5);//std::floor( LOG2E * x + 0.5 );
87  n=nv[i];
88 
89  px = n;
90 
91  x -= px * 6.93145751953125E-1;
92  x -= px * 1.42860682030941723212E-6;
93 
94  xx = x * x;
95 
96  // px = x * P(x**2).
97  px = PX1exp;
98  px *= xx;
99  px += PX2exp;
100  px *= xx;
101  px += PX3exp;
102  px *= x;
103 
104  // Evaluate Q(x**2).
105  qx = QX1exp;
106  qx *= xx;
107  qx += QX2exp;
108  qx *= xx;
109  qx += QX3exp;
110  qx *= xx;
111  qx += QX4exp;
112 
113  // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
114  x = px / (qx - px);
115  x = 1.0 + 2.0 * x;
116 
117  // partial
118  output[i]=x;
119 
120  } // end loop on input vals
121 
122  //Profitability threshold = 4
123  for (unsigned int i = 0; i < arr_size; ++i) {
124  // Build 2^n in double.
125  n=nv[i];
126  u.d = 0;
127  n += 1023;
128  u.ll = (long long) (n) << 52;
129  output[i] = output[i] * u.d;
130 
131  if (input[i] > EXP_LIMIT)
133  if (input[i] < -EXP_LIMIT)
134  output[i] = 0.;
135  }
136 
137  delete [] nv;
138 }
139 
140 //------------------------------------------------------------------------------
141 
142 /*******************************************************************************
143  *
144  * LOG IMPLEMENTATIONS
145  *
146  ******************************************************************************/
147 
148 //------------------------------------------------------------------------------
149 // The implementation is such in order to vectorise also with gcc461
150 void vdt::fast_log_vect_46(double const * original_input,
151  double* output,
152  const unsigned int arr_size){
153 
154  double* input = new double [arr_size];
155  double* x_arr = new double [arr_size];
156  int* fe_arr = new int [arr_size];
157 
158  double y, z;
159  double px,qx;
160  double x;
161  int fe;
162  // Profitability threshold = 4
163  for (unsigned int i = 0; i < arr_size; ++i) {
164  input[i] = original_input[i];
165  x= input[i];
166 
167 
168  /* separate mantissa from exponent
169  */
170 
171 // double input_x=x;
172 
173  /* separate mantissa from exponent */
174  double fe;
175  x = getMantExponent(x,fe);
176 
177  // blending
178  if( x < SQRTH ) {
179  fe-=1;
180  x += x ;
181  }
182  x -= 1.0;
183 
184  x_arr[i]=x;
185  fe_arr[i]=fe;
186 
187  }
188  // Profitability threshold = 7
189  for (unsigned int i = 0; i < arr_size; ++i) {
190 
191  x = x_arr[i];
192  fe = fe_arr[i];
193 
194  /* rational form */
195 
196  z = x*x;
197  px = PX1log;
198  px *= x;
199  px += PX2log;
200  px *= x;
201  px += PX3log;
202  px *= x;
203  px += PX4log;
204  px *= x;
205  px += PX5log;
206  px *= x;
207  px += PX6log;
208  //
209  //for the final formula
210  px *= x;
211  px *= z;
212 
213 
214  qx = x;
215  qx += QX1log;
216  qx *=x;
217  qx += QX2log;
218  qx *=x;
219  qx += QX3log;
220  qx *=x;
221  qx += QX4log;
222  qx *=x;
223  qx += QX5log;
224 
225 
226  y = px / qx ;
227 
228  y -= fe * 2.121944400546905827679e-4;
229  y -= 0.5 * z ;
230 
231  z = x + y;
232  z += fe * 0.693359375;
233  output[i]= z;
234  }
235 
236  for (unsigned int i = 0; i < arr_size; ++i) {
237  if (original_input[i] > LOG_UPPER_LIMIT)
239  if (original_input[i] < LOG_LOWER_LIMIT)
241  }
242 
243  delete [] input;
244  delete [] x_arr;
245  delete [] fe_arr;
246 
247 }
248 
249 
250 //------------------------------------------------------------------------------
251 
constexpr double QX4log
Definition: VDTMath.h:68
constexpr double QX3log
Definition: VDTMath.h:67
int i
Definition: DBlmapReader.cc:9
constexpr double PX1log
Definition: VDTMath.h:58
double d
Definition: VDTMath.h:170
long long ll
Definition: VDTMath.h:172
constexpr double PX2log
Definition: VDTMath.h:59
void fast_exp_vect_46(double const *input, double *output, const unsigned int arr_size)
Some tweaks to make it vectorise with gcc46.
Definition: VDTMath.cc:63
constexpr double EXP_LIMIT
Definition: VDTMath.h:46
constexpr double PX2exp
Definition: VDTMath.h:48
double double double z
void print_instructions_info()
Print the instructions used on screen.
Definition: VDTMath.cc:24
constexpr double LOG_UPPER_LIMIT
Definition: VDTMath.h:56
VDT_FORCE_INLINE double getMantExponent(double x, double &fe)
Like frexp but vectorising and the exponent is a double.
Definition: VDTMath.h:196
constexpr double QX1log
Definition: VDTMath.h:65
constexpr double SQRTH
Definition: VDTMath.h:36
constexpr double LOG_LOWER_LIMIT
Definition: VDTMath.h:57
constexpr double QX5log
Definition: VDTMath.h:69
const double infinity
constexpr double QX2log
Definition: VDTMath.h:66
constexpr double QX4exp
Definition: VDTMath.h:53
void fast_log_vect_46(double const *input, double *output, const unsigned int arr_size)
Some tweaks to make it vectorise with gcc46.
Definition: VDTMath.cc:150
constexpr double PX1exp
Definition: VDTMath.h:47
constexpr double QX2exp
Definition: VDTMath.h:51
constexpr double PX5log
Definition: VDTMath.h:62
constexpr double PX3log
Definition: VDTMath.h:60
constexpr double QX1exp
Definition: VDTMath.h:50
constexpr double PX4log
Definition: VDTMath.h:61
constexpr double PX3exp
Definition: VDTMath.h:49
tuple cout
Definition: gather_cfg.py:121
constexpr double PX6log
Definition: VDTMath.h:63
Definition: DDAxes.h:10
constexpr double LOG2E
Definition: VDTMath.h:35
constexpr double QX3exp
Definition: VDTMath.h:52
Used to switch between different type of interpretations of the data (64 bits)
Definition: VDTMath.h:169