CMS 3D CMS Logo

KalmanUtilsMPlex.cc
Go to the documentation of this file.
1 #include "KalmanUtilsMPlex.h"
2 #include "PropagationMPlex.h"
3 
4 //#define DEBUG
5 #include "Debug.h"
6 
7 #include "KalmanUtilsMPlex.icc"
8 
10 
11 namespace {
12  using namespace mkfit;
13  using idx_t = Matriplex::idx_t;
14 
15  inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
16  // outPar = psPar + kalmanGain*(dPar)
17  // D = B A C
18  // where right half of kalman gain is 0
19 
20  // XXX Regenerate with a script.
21 
22  MultResidualsAdd_imp(A, B, C, D, 0, NN);
23  }
24 
25  inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
26  // outPar = psPar + kalmanGain*(dPar)
27  // D = B A C
28  // where right half of kalman gain is 0
29 
30  // XXX Regenerate with a script.
31 
32  typedef float T;
33  const idx_t N = NN;
34 
35  const T* a = A.fArray;
36  ASSUME_ALIGNED(a, 64);
37  const T* b = B.fArray;
38  ASSUME_ALIGNED(b, 64);
39  const T* c = C.fArray;
40  ASSUME_ALIGNED(c, 64);
41  T* d = D.fArray;
42  ASSUME_ALIGNED(d, 64);
43 
44 #pragma omp simd
45  for (idx_t n = 0; n < N; ++n) {
46  // generate loop (can also write it manually this time, it's not much)
47  d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
48  d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
49  d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
50  d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
51  d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
52  d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
53  }
54  }
55 
56  //------------------------------------------------------------------------------
57 
58  inline void Chi2Similarity(const MPlex2V& A, //resPar
59  const MPlex2S& C, //resErr
60  MPlexQF& D) //outChi2
61  {
62  // outChi2 = (resPar) * resErr * (resPar)
63  // D = A * C * A
64 
65  typedef float T;
66  const idx_t N = NN;
67 
68  const T* a = A.fArray;
69  ASSUME_ALIGNED(a, 64);
70  const T* c = C.fArray;
71  ASSUME_ALIGNED(c, 64);
72  T* d = D.fArray;
73  ASSUME_ALIGNED(d, 64);
74 
75 #pragma omp simd
76  for (idx_t n = 0; n < N; ++n) {
77  // generate loop (can also write it manually this time, it's not much)
78  d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
79  2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
80  }
81  }
82 
83  //------------------------------------------------------------------------------
84 
85  inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
86  // The rest of matrix is left untouched.
87 
88  typedef float T;
89  const idx_t N = NN;
90 
91  const T* a = A.fArray;
92  ASSUME_ALIGNED(a, 64);
93  const T* b = B.fArray;
94  ASSUME_ALIGNED(b, 64);
95  T* c = C.fArray;
96  ASSUME_ALIGNED(c, 64);
97 
98 #pragma omp simd
99  for (idx_t n = 0; n < N; ++n) {
100  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
101  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
102  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
103  c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
104  c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
105  c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
106  }
107  }
108 
109  inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
110  // The rest of matrix is left untouched.
111 
112  typedef float T;
113  const idx_t N = NN;
114 
115  const T* a = A.fArray;
116  ASSUME_ALIGNED(a, 64);
117  const T* b = B.fArray;
118  ASSUME_ALIGNED(b, 64);
119  T* c = C.fArray;
120  ASSUME_ALIGNED(c, 64);
121 
122 #pragma omp simd
123  for (idx_t n = 0; n < N; ++n) {
124  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
125  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
126  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
127  }
128  }
129 
130  //------------------------------------------------------------------------------
131 
132  inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
133  // The rest of matrix is left untouched.
134 
135  typedef float T;
136  const idx_t N = NN;
137 
138  const T* a = A.fArray;
139  ASSUME_ALIGNED(a, 64);
140  const T* b = B.fArray;
141  ASSUME_ALIGNED(b, 64);
142  T* c = C.fArray;
143  ASSUME_ALIGNED(c, 64);
144 
145 #pragma omp simd
146  for (idx_t n = 0; n < N; ++n) {
147  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
148  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
149  c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
150  }
151  }
152 
153  inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
154  // The rest of matrix is left untouched.
155 
156  typedef float T;
157  const idx_t N = NN;
158 
159  const T* a = A.fArray;
160  ASSUME_ALIGNED(a, 64);
161  const T* b = B.fArray;
162  ASSUME_ALIGNED(b, 64);
163  T* c = C.fArray;
164  ASSUME_ALIGNED(c, 64);
165 
166 #pragma omp simd
167  for (idx_t n = 0; n < N; ++n) {
168  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
169  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
170  }
171  }
172 
173  //==============================================================================
174 
175  inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
176  // C = A * B, C is 3x3, A is 3x3 , B is 3x3 sym
177 
178  // Based on script generation and adapted to custom sizes.
179 
180  typedef float T;
181  const idx_t N = NN;
182 
183  const T* a00 = A00.fArray;
184  ASSUME_ALIGNED(a00, 64);
185  const T* a01 = A01.fArray;
186  ASSUME_ALIGNED(a01, 64);
187  const T* b = B.fArray;
188  ASSUME_ALIGNED(b, 64);
189  T* c = C.fArray;
190  ASSUME_ALIGNED(c, 64);
191 
192 #pragma omp simd
193  for (int n = 0; n < N; ++n) {
194  c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
195  c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
196  c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
197  c[3 * N + n] = b[3 * N + n];
198  c[4 * N + n] = b[4 * N + n];
199  c[5 * N + n] = b[5 * N + n];
200  c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
201  c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
202  c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
203  }
204  }
205 
206  inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
207  // C = A * B, C is 3x3 sym, A is 3x3 , B is 3x3
208 
209  // Based on script generation and adapted to custom sizes.
210 
211  typedef float T;
212  const idx_t N = NN;
213 
214  const T* a00 = A00.fArray;
215  ASSUME_ALIGNED(a00, 64);
216  const T* a01 = A01.fArray;
217  ASSUME_ALIGNED(a01, 64);
218  const T* b = B.fArray;
219  ASSUME_ALIGNED(b, 64);
220  T* c = C.fArray;
221  ASSUME_ALIGNED(c, 64);
222 
223 #pragma omp simd
224  for (int n = 0; n < N; ++n) {
225  c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
226  c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
227  c[2 * N + n] = b[5 * N + n];
228  }
229  }
230 
231  inline void RotateResidualsOnTangentPlane(const MPlexQF& R00, //r00
232  const MPlexQF& R01, //r01
233  const MPlexHV& A, //res_glo
234  MPlex2V& B) //res_loc
235  {
236  RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
237  }
238 
239  //==============================================================================
240 
241  inline void ProjectResErr(const MPlex2H& A, const MPlexHS& B, MPlex2H& C) {
242  // C = A * B, C is 2x3, A is 2x3 , B is 3x3 sym
243 
244  /*
245  A 0 1 2
246  3 4 5
247  B 0 1 3
248  1 2 4
249  3 4 5
250  */
251 
252  typedef float T;
253  const idx_t N = NN;
254 
255  const T* a = A.fArray;
256  ASSUME_ALIGNED(a, 64);
257  const T* b = B.fArray;
258  ASSUME_ALIGNED(b, 64);
259  T* c = C.fArray;
260  ASSUME_ALIGNED(c, 64);
261 
262 #pragma omp simd
263  for (int n = 0; n < N; ++n) {
264  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n];
265  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n];
266  c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n];
267  c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n];
268  c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
269  c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n];
270  }
271  }
272 
273  inline void ProjectResErrTransp(const MPlex2H& A, const MPlex2H& B, MPlex2S& C) {
274  // C = B * A^T, C is 2x2 sym, A is 2x3 (A^T is 3x2), B is 2x3
275 
276  /*
277  B 0 1 2
278  3 4 5
279  A^T 0 3
280  1 4
281  2 5
282  */
283 
284  typedef float T;
285  const idx_t N = NN;
286 
287  const T* a = A.fArray;
288  ASSUME_ALIGNED(a, 64);
289  const T* b = B.fArray;
290  ASSUME_ALIGNED(b, 64);
291  T* c = C.fArray;
292  ASSUME_ALIGNED(c, 64);
293 
294 #pragma omp simd
295  for (int n = 0; n < N; ++n) {
296  c[0 * N + n] = b[0 * N + n] * a[0 * N + n] + b[1 * N + n] * a[1 * N + n] + b[2 * N + n] * a[2 * N + n];
297  c[1 * N + n] = b[0 * N + n] * a[3 * N + n] + b[1 * N + n] * a[4 * N + n] + b[2 * N + n] * a[5 * N + n];
298  c[2 * N + n] = b[3 * N + n] * a[3 * N + n] + b[4 * N + n] * a[4 * N + n] + b[5 * N + n] * a[5 * N + n];
299  }
300  }
301 
302  inline void RotateResidualsOnPlane(const MPlex2H& R, //prj
303  const MPlexHV& A, //res_glo
304  MPlex2V& B) //res_loc
305  {
306  // typedef float T;
307  // const idx_t N = NN;
308 
309  // const T* a = A.fArray;
310  // ASSUME_ALIGNED(a, 64);
311  // T* b = B.fArray;
312  // ASSUME_ALIGNED(b, 64);
313  // const T* r = R.fArray;
314  // ASSUME_ALIGNED(r, 64);
315 
316 #pragma omp simd
317  for (int n = 0; n < NN; ++n) {
318  B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
319  B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
320  }
321  }
322 
323  inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
324  // HTG = rot * res_loc
325  // C = A * B
326 
327  // Based on script generation and adapted to custom sizes.
328 
329  typedef float T;
330  const idx_t N = NN;
331 
332  const T* a00 = A00.fArray;
333  ASSUME_ALIGNED(a00, 64);
334  const T* a01 = A01.fArray;
335  ASSUME_ALIGNED(a01, 64);
336  const T* b = B.fArray;
337  ASSUME_ALIGNED(b, 64);
338  T* c = C.fArray;
339  ASSUME_ALIGNED(c, 64);
340 
341 #pragma omp simd
342  for (int n = 0; n < N; ++n) {
343  c[0 * N + n] = a00[n] * b[0 * N + n];
344  c[1 * N + n] = a00[n] * b[1 * N + n];
345  c[2 * N + n] = 0.;
346  c[3 * N + n] = a01[n] * b[0 * N + n];
347  c[4 * N + n] = a01[n] * b[1 * N + n];
348  c[5 * N + n] = 0.;
349  c[6 * N + n] = b[1 * N + n];
350  c[7 * N + n] = b[2 * N + n];
351  c[8 * N + n] = 0.;
352  }
353  }
354 
355  inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
356  // C = A * B, C is 6x3, A is 6x6 sym , B is 3x3
357 
358  typedef float T;
359  const idx_t N = NN;
360 
361  const T* a = A.fArray;
362  ASSUME_ALIGNED(a, 64);
363  const T* b = B.fArray;
364  ASSUME_ALIGNED(b, 64);
365  T* c = C.fArray;
366  ASSUME_ALIGNED(c, 64);
367 
368 #pragma omp simd
369  for (int n = 0; n < N; ++n) {
370  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
371  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
372  c[2 * N + n] = 0;
373  c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
374  c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
375  c[5 * N + n] = 0;
376  c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
377  c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
378  c[8 * N + n] = 0;
379  c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
380  c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
381  c[11 * N + n] = 0;
382  c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
383  c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
384  c[14 * N + n] = 0;
385  c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
386  c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
387  c[17 * N + n] = 0;
388  }
389  }
390 
391  inline void KalmanHTG(const MPlex2H& A, const MPlex2S& B, MPlexH2& C) {
392  // HTG = prj^T * res_loc
393  // C = A^T * B
394 
395  /*
396  A^T 0 3
397  1 4
398  2 5
399  B 0 1
400  1 2
401  C 0 1
402  2 3
403  4 5
404  */
405 
406  typedef float T;
407  const idx_t N = NN;
408 
409  const T* a = A.fArray;
410  ASSUME_ALIGNED(a, 64);
411  const T* b = B.fArray;
412  ASSUME_ALIGNED(b, 64);
413  T* c = C.fArray;
414  ASSUME_ALIGNED(c, 64);
415 
416 #pragma omp simd
417  for (int n = 0; n < N; ++n) {
418  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[3 * N + n] * b[1 * N + n];
419  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[3 * N + n] * b[2 * N + n];
420  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n];
421  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n];
422  c[4 * N + n] = a[2 * N + n] * b[0 * N + n] + a[5 * N + n] * b[1 * N + n];
423  c[5 * N + n] = a[2 * N + n] * b[1 * N + n] + a[5 * N + n] * b[2 * N + n];
424  }
425  }
426 
427  inline void KalmanGain(const MPlexLS& A, const MPlexH2& B, MPlexL2& C) {
428  // C = A * B, C is 6x2, A is 6x6 sym , B is 3x2 (6x2 but half of it is zeros)
429 
430  /*
431  A 0 1 3 6 10 15
432  1 2 4 7 11 16
433  3 4 5 8 12 17
434  6 7 8 9 13 18
435  10 11 12 13 14 19
436  15 16 17 18 19 20
437  B 0 1
438  2 3
439  4 5
440  X X with X=0, so not even included in B
441  X X
442  X X
443  C 0 1
444  2 3
445  4 5
446  6 7
447  8 9
448  10 11
449  */
450 
451  typedef float T;
452  const idx_t N = NN;
453 
454  const T* a = A.fArray;
455  ASSUME_ALIGNED(a, 64);
456  const T* b = B.fArray;
457  ASSUME_ALIGNED(b, 64);
458  T* c = C.fArray;
459  ASSUME_ALIGNED(c, 64);
460 
461 #pragma omp simd
462  for (int n = 0; n < N; ++n) {
463  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[2 * N + n] + a[3 * N + n] * b[4 * N + n];
464  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[5 * N + n];
465  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[2 * N + n] + a[4 * N + n] * b[4 * N + n];
466  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[5 * N + n];
467  c[4 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
468  c[5 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[5 * N + n];
469  c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n];
470  c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[5 * N + n];
471  c[8 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[2 * N + n] + a[12 * N + n] * b[4 * N + n];
472  c[9 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[5 * N + n];
473  c[10 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[2 * N + n] + a[17 * N + n] * b[4 * N + n];
474  c[11 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[5 * N + n];
475  }
476  }
477 
478  inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) {
479  // C is transformed to align along y after rotation and rotated back
480 
481  typedef float T;
482  const idx_t N = NN;
483 
484  const T* r00 = R00.fArray;
485  ASSUME_ALIGNED(r00, 64);
486  const T* r01 = R01.fArray;
487  ASSUME_ALIGNED(r01, 64);
488  const T* ci = Ci.fArray;
489  ASSUME_ALIGNED(ci, 64);
490  T* co = Co.fArray;
491  ASSUME_ALIGNED(co, 64);
492 
493 #pragma omp simd
494  for (int n = 0; n < N; ++n) {
495  // a bit loopy to avoid temporaries
496  co[0 * N + n] =
497  r00[n] * r00[n] * ci[0 * N + n] + 2 * r00[n] * r01[n] * ci[1 * N + n] + r01[n] * r01[n] * ci[2 * N + n];
498  co[1 * N + n] = r00[n] * r01[n] * co[0 * N + n];
499  co[2 * N + n] = r01[n] * r01[n] * co[0 * N + n];
500  co[0 * N + n] = r00[n] * r00[n] * co[0 * N + n];
501 
502  co[3 * N + n] = r00[n] * ci[3 * N + n] + r01[n] * ci[4 * N + n];
503  co[4 * N + n] = r01[n] * co[3 * N + n];
504  co[3 * N + n] = r00[n] * co[3 * N + n];
505 
506  co[6 * N + n] = r00[n] * ci[6 * N + n] + r01[n] * ci[7 * N + n];
507  co[7 * N + n] = r01[n] * co[6 * N + n];
508  co[6 * N + n] = r00[n] * co[6 * N + n];
509 
510  co[10 * N + n] = r00[n] * ci[10 * N + n] + r01[n] * ci[11 * N + n];
511  co[11 * N + n] = r01[n] * co[10 * N + n];
512  co[10 * N + n] = r00[n] * co[10 * N + n];
513 
514  co[15 * N + n] = r00[n] * ci[15 * N + n] + r01[n] * ci[16 * N + n];
515  co[16 * N + n] = r01[n] * co[15 * N + n];
516  co[15 * N + n] = r00[n] * co[15 * N + n];
517  }
518  }
519 
520  void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
521  // C = A * B, C is 6x2, A is 6x6 sym , B is 2x2
522 
523  typedef float T;
524  const idx_t N = NN;
525 
526  const T* a = A.fArray;
527  ASSUME_ALIGNED(a, 64);
528  const T* b = B.fArray;
529  ASSUME_ALIGNED(b, 64);
530  T* c = C.fArray;
531  ASSUME_ALIGNED(c, 64);
532 
533 #include "KalmanGain62.ah"
534  }
535 
536  inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
537  // C = A * B, C is 6x6, A is 6x3 , B is 3x6
538  KHMult_imp(A, B00, B01, C, 0, NN);
539  }
540 
541  inline void KHMult(const MPlexL2& A, const MPlex2H& B, MPlexLL& C) {
542  // C = A * B, C is 6x6, A is 6x2 , B is 2x3 (2x6 but half of it made of zeros)
543 
544  /*
545  A 0 1
546  2 3
547  4 5
548  6 7
549  8 9
550  10 11
551  B 0 1 2 X X X with X=0 so not included in B
552  3 4 5 X X X
553  C 0 1 2 3 4 5
554  6 7 8 9 10 11
555  12 13 14 15 16 17
556  18 19 20 21 22 23
557  24 25 26 27 28 29
558  30 31 32 33 34 34
559  */
560 
561  // typedef float T;
562  // const idx_t N = NN;
563 
564  // const T* a = A.fArray;
565  // ASSUME_ALIGNED(a, 64);
566  // const T* b = B.fArray;
567  // ASSUME_ALIGNED(b, 64);
568  // T* c = C.fArray;
569  // ASSUME_ALIGNED(c, 64);
570 
571 #pragma omp simd
572  for (int n = 0; n < NN; ++n) {
573  C(n, 0, 0) = A(n, 0, 0) * B(n, 0, 0) + A(n, 0, 1) * B(n, 1, 0);
574  C(n, 0, 1) = A(n, 0, 0) * B(n, 0, 1) + A(n, 0, 1) * B(n, 1, 1);
575  C(n, 0, 2) = A(n, 0, 0) * B(n, 0, 2) + A(n, 0, 1) * B(n, 1, 2);
576  C(n, 0, 3) = 0;
577  C(n, 0, 4) = 0;
578  C(n, 0, 5) = 0;
579  C(n, 0, 6) = A(n, 1, 0) * B(n, 0, 0) + A(n, 1, 1) * B(n, 1, 0);
580  C(n, 0, 7) = A(n, 1, 0) * B(n, 0, 1) + A(n, 1, 1) * B(n, 1, 1);
581  C(n, 0, 8) = A(n, 1, 0) * B(n, 0, 2) + A(n, 1, 1) * B(n, 1, 2);
582  C(n, 0, 9) = 0;
583  C(n, 0, 10) = 0;
584  C(n, 0, 11) = 0;
585  C(n, 0, 12) = A(n, 2, 0) * B(n, 0, 0) + A(n, 2, 1) * B(n, 1, 0);
586  C(n, 0, 13) = A(n, 2, 0) * B(n, 0, 1) + A(n, 2, 1) * B(n, 1, 1);
587  C(n, 0, 14) = A(n, 2, 0) * B(n, 0, 2) + A(n, 2, 1) * B(n, 1, 2);
588  C(n, 0, 15) = 0;
589  C(n, 0, 16) = 0;
590  C(n, 0, 17) = 0;
591  C(n, 0, 18) = A(n, 3, 0) * B(n, 0, 0) + A(n, 3, 1) * B(n, 1, 0);
592  C(n, 0, 19) = A(n, 3, 0) * B(n, 0, 1) + A(n, 3, 1) * B(n, 1, 1);
593  C(n, 0, 20) = A(n, 3, 0) * B(n, 0, 2) + A(n, 3, 1) * B(n, 1, 2);
594  C(n, 0, 21) = 0;
595  C(n, 0, 22) = 0;
596  C(n, 0, 23) = 0;
597  C(n, 0, 24) = A(n, 4, 0) * B(n, 0, 0) + A(n, 4, 1) * B(n, 1, 0);
598  C(n, 0, 25) = A(n, 4, 0) * B(n, 0, 1) + A(n, 4, 1) * B(n, 1, 1);
599  C(n, 0, 26) = A(n, 4, 0) * B(n, 0, 2) + A(n, 4, 1) * B(n, 1, 2);
600  C(n, 0, 27) = 0;
601  C(n, 0, 28) = 0;
602  C(n, 0, 29) = 0;
603  C(n, 0, 30) = A(n, 5, 0) * B(n, 0, 0) + A(n, 5, 1) * B(n, 1, 0);
604  C(n, 0, 31) = A(n, 5, 0) * B(n, 0, 1) + A(n, 5, 1) * B(n, 1, 1);
605  C(n, 0, 32) = A(n, 5, 0) * B(n, 0, 2) + A(n, 5, 1) * B(n, 1, 2);
606  C(n, 0, 33) = 0;
607  C(n, 0, 34) = 0;
608  C(n, 0, 35) = 0;
609  }
610  }
611 
612  inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
613  // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym
614 
615  typedef float T;
616  const idx_t N = NN;
617 
618  const T* a = A.fArray;
619  ASSUME_ALIGNED(a, 64);
620  const T* b = B.fArray;
621  ASSUME_ALIGNED(b, 64);
622  T* c = C.fArray;
623  ASSUME_ALIGNED(c, 64);
624 
625 #include "KHC.ah"
626  }
627 
628  inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
629  // C = A * B, C is 6x6 sym, A is 6x2 , B is 6x6 sym
630 
631  typedef float T;
632  const idx_t N = NN;
633 
634  const T* a = A.fArray;
635  ASSUME_ALIGNED(a, 64);
636  const T* b = B.fArray;
637  ASSUME_ALIGNED(b, 64);
638  T* c = C.fArray;
639  ASSUME_ALIGNED(c, 64);
640 
641 #include "K62HC.ah"
642  }
643 
644  //Warning: MultFull is not vectorized!
645  template <typename T1, typename T2, typename T3>
646  void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
647 #ifdef DEBUG
648  assert(nja == nib);
649  assert(nia == nic);
650  assert(njb == njc);
651 #endif
652  for (int n = 0; n < NN; ++n) {
653  for (int i = 0; i < nia; ++i) {
654  for (int j = 0; j < njb; ++j) {
655  C(n, i, j) = 0.;
656  for (int k = 0; k < nja; ++k)
657  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
658  }
659  }
660  }
661  }
662 
663  //Warning: MultTranspFull is not vectorized!
664  // (careful about which one is transposed, I think rows and cols are swapped and the one that is transposed is A)
665  template <typename T1, typename T2, typename T3>
666  void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
667 #ifdef DEBUG
668  assert(nja == njb);
669  assert(nia == nic);
670  assert(nib == njc);
671 #endif
672  for (int n = 0; n < NN; ++n) {
673  for (int i = 0; i < nia; ++i) {
674  for (int j = 0; j < nib; ++j) {
675  C(n, i, j) = 0.;
676  for (int k = 0; k < nja; ++k)
677  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
678  }
679  }
680  }
681  }
682 
683 } // namespace
684 
685 //==============================================================================
686 // Kalman operations - common dummy variables
687 //==============================================================================
688 
689 namespace {
690  // Dummy variables for parameter consistency to kalmanOperation.
691  // Through KalmanFilterOperation enum parameter it is guaranteed that
692  // those will never get accessed in the code (read from or written into).
693 
694  CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
695  CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
696  CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
697 } // namespace
698 
699 namespace mkfit {
700 
701  //==============================================================================
702  // Kalman operations - Barrel
703  //==============================================================================
704 
705  void kalmanUpdate(const MPlexLS& psErr,
706  const MPlexLV& psPar,
707  const MPlexHS& msErr,
708  const MPlexHV& msPar,
709  MPlexLS& outErr,
710  MPlexLV& outPar,
711  const int N_proc) {
712  kalmanOperation(KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
713  }
714 
716  const MPlexLV& psPar,
717  MPlexQI& Chg,
718  const MPlexHS& msErr,
719  const MPlexHV& msPar,
720  MPlexLS& outErr,
721  MPlexLV& outPar,
722  MPlexQI& outFailFlag,
723  const int N_proc,
724  const PropagationFlags& propFlags,
725  const bool propToHit) {
726  if (propToHit) {
727  MPlexLS propErr;
728  MPlexLV propPar;
729  MPlexQF msRad;
730 #pragma omp simd
731  for (int n = 0; n < NN; ++n) {
732  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
733  }
734 
735  propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
736 
738  KFO_Update_Params | KFO_Local_Cov, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
739  } else {
741  KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
742  }
743  for (int n = 0; n < NN; ++n) {
744  if (n < N_proc && outPar.At(n, 3, 0) < 0) {
745  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
746  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
747  }
748  }
749  }
750 
751  //------------------------------------------------------------------------------
752 
753  void kalmanComputeChi2(const MPlexLS& psErr,
754  const MPlexLV& psPar,
755  const MPlexQI& inChg,
756  const MPlexHS& msErr,
757  const MPlexHV& msPar,
758  MPlexQF& outChi2,
759  const int N_proc) {
760  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
761  }
762 
764  const MPlexLV& psPar,
765  const MPlexQI& inChg,
766  const MPlexHS& msErr,
767  const MPlexHV& msPar,
768  MPlexQF& outChi2,
769  MPlexLV& propPar,
770  MPlexQI& outFailFlag,
771  const int N_proc,
772  const PropagationFlags& propFlags,
773  const bool propToHit) {
774  propPar = psPar;
775  if (propToHit) {
776  MPlexLS propErr;
777  MPlexQF msRad;
778 #pragma omp simd
779  for (int n = 0; n < NN; ++n) {
780  if (n < N_proc) {
781  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
782  } else {
783  msRad.At(n, 0, 0) = 0.0f;
784  }
785  }
786 
787  propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
788 
789  kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
790  } else {
791  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
792  }
793  }
794 
795  //------------------------------------------------------------------------------
796 
797  void kalmanOperation(const int kfOp,
798  const MPlexLS& psErr,
799  const MPlexLV& psPar,
800  const MPlexHS& msErr,
801  const MPlexHV& msPar,
802  MPlexLS& outErr,
803  MPlexLV& outPar,
804  MPlexQF& outChi2,
805  const int N_proc) {
806 #ifdef DEBUG
807  {
808  dmutex_guard;
809  printf("psPar:\n");
810  for (int i = 0; i < 6; ++i) {
811  printf("%8f ", psPar.constAt(0, 0, i));
812  printf("\n");
813  }
814  printf("\n");
815  printf("psErr:\n");
816  for (int i = 0; i < 6; ++i) {
817  for (int j = 0; j < 6; ++j)
818  printf("%8f ", psErr.constAt(0, i, j));
819  printf("\n");
820  }
821  printf("\n");
822  printf("msPar:\n");
823  for (int i = 0; i < 3; ++i) {
824  printf("%8f ", msPar.constAt(0, 0, i));
825  printf("\n");
826  }
827  printf("\n");
828  printf("msErr:\n");
829  for (int i = 0; i < 3; ++i) {
830  for (int j = 0; j < 3; ++j)
831  printf("%8f ", msErr.constAt(0, i, j));
832  printf("\n");
833  }
834  printf("\n");
835  }
836 #endif
837 
838  // Rotate global point on tangent plane to cylinder
839  // Tangent point is half way between hit and propagate position
840 
841  // Rotation matrix
842  // rotT00 0 rotT01
843  // rotT01 0 -rotT00
844  // 0 1 0
845  // Minimize temporaries: only two float are needed!
846 
847  MPlexQF rotT00;
848  MPlexQF rotT01;
849  for (int n = 0; n < NN; ++n) {
850  if (n < N_proc) {
851  const float r = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
852  rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
853  rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
854  } else {
855  rotT00.At(n, 0, 0) = 0.0f;
856  rotT01.At(n, 0, 0) = 0.0f;
857  }
858  }
859 
860  MPlexHV res_glo; //position residual in global coordinates
861  SubtractFirst3(msPar, psPar, res_glo);
862 
863  MPlexHS resErr_glo; //covariance sum in global position coordinates
864  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
865 
866  MPlex2V res_loc; //position residual in local coordinates
867  RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
868  MPlex2S resErr_loc; //covariance sum in local position coordinates
869  MPlexHH tempHH;
870  ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
871  ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
872 
873 #ifdef DEBUG
874  {
875  dmutex_guard;
876  printf("res_glo:\n");
877  for (int i = 0; i < 3; ++i) {
878  printf("%8f ", res_glo.At(0, i, 0));
879  }
880  printf("\n");
881  printf("resErr_glo:\n");
882  for (int i = 0; i < 3; ++i) {
883  for (int j = 0; j < 3; ++j)
884  printf("%8f ", resErr_glo.At(0, i, j));
885  printf("\n");
886  }
887  printf("\n");
888  printf("res_loc:\n");
889  for (int i = 0; i < 2; ++i) {
890  printf("%8f ", res_loc.At(0, i, 0));
891  }
892  printf("\n");
893  printf("tempHH:\n");
894  for (int i = 0; i < 3; ++i) {
895  for (int j = 0; j < 3; ++j)
896  printf("%8f ", tempHH.At(0, i, j));
897  printf("\n");
898  }
899  printf("\n");
900  printf("resErr_loc:\n");
901  for (int i = 0; i < 2; ++i) {
902  for (int j = 0; j < 2; ++j)
903  printf("%8f ", resErr_loc.At(0, i, j));
904  printf("\n");
905  }
906  printf("\n");
907  }
908 #endif
909 
910  //invert the 2x2 matrix
911  Matriplex::invertCramerSym(resErr_loc);
912 
913  if (kfOp & KFO_Calculate_Chi2) {
914  Chi2Similarity(res_loc, resErr_loc, outChi2);
915 
916 #ifdef DEBUG
917  {
918  dmutex_guard;
919  printf("resErr_loc (Inv):\n");
920  for (int i = 0; i < 2; ++i) {
921  for (int j = 0; j < 2; ++j)
922  printf("%8f ", resErr_loc.At(0, i, j));
923  printf("\n");
924  }
925  printf("\n");
926  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
927  }
928 #endif
929  }
930 
931  if (kfOp & KFO_Update_Params) {
932  MPlexLS psErrLoc = psErr;
933  if (kfOp & KFO_Local_Cov)
934  CovXYconstrain(rotT00, rotT01, psErr, psErrLoc);
935 
936  MPlexLH K; // kalman gain, fixme should be L2
937  KalmanHTG(rotT00, rotT01, resErr_loc, tempHH); // intermediate term to get kalman gain (H^T*G)
938  KalmanGain(psErrLoc, tempHH, K);
939 
940  MultResidualsAdd(K, psPar, res_loc, outPar);
941 
942  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
943 
944  MPlexLL tempLL;
945  KHMult(K, rotT00, rotT01, tempLL);
946  KHC(tempLL, psErrLoc, outErr);
947  outErr.subtract(psErrLoc, outErr);
948 
949 #ifdef DEBUG
950  {
951  dmutex_guard;
952  if (kfOp & KFO_Local_Cov) {
953  printf("psErrLoc:\n");
954  for (int i = 0; i < 6; ++i) {
955  for (int j = 0; j < 6; ++j)
956  printf("% 8e ", psErrLoc.At(0, i, j));
957  printf("\n");
958  }
959  printf("\n");
960  }
961  printf("resErr_loc (Inv):\n");
962  for (int i = 0; i < 2; ++i) {
963  for (int j = 0; j < 2; ++j)
964  printf("%8f ", resErr_loc.At(0, i, j));
965  printf("\n");
966  }
967  printf("\n");
968  printf("tempHH:\n");
969  for (int i = 0; i < 3; ++i) {
970  for (int j = 0; j < 3; ++j)
971  printf("%8f ", tempHH.At(0, i, j));
972  printf("\n");
973  }
974  printf("\n");
975  printf("K:\n");
976  for (int i = 0; i < 6; ++i) {
977  for (int j = 0; j < 3; ++j)
978  printf("%8f ", K.At(0, i, j));
979  printf("\n");
980  }
981  printf("\n");
982  printf("tempLL:\n");
983  for (int i = 0; i < 6; ++i) {
984  for (int j = 0; j < 6; ++j)
985  printf("%8f ", tempLL.At(0, i, j));
986  printf("\n");
987  }
988  printf("\n");
989  printf("outPar:\n");
990  for (int i = 0; i < 6; ++i) {
991  printf("%8f ", outPar.At(0, i, 0));
992  }
993  printf("\n");
994  printf("outErr:\n");
995  for (int i = 0; i < 6; ++i) {
996  for (int j = 0; j < 6; ++j)
997  printf("%8f ", outErr.At(0, i, j));
998  printf("\n");
999  }
1000  printf("\n");
1001  }
1002 #endif
1003  }
1004  }
1005 
1006  //==============================================================================
1007  // Kalman operations - Plane
1008  //==============================================================================
1009 
1010  void kalmanUpdatePlane(const MPlexLS& psErr,
1011  const MPlexLV& psPar,
1012  const MPlexHS& msErr,
1013  const MPlexHV& msPar,
1014  const MPlexHV& plNrm,
1015  const MPlexHV& plDir,
1016  MPlexLS& outErr,
1017  MPlexLV& outPar,
1018  const int N_proc) {
1020  KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, plNrm, plDir, outErr, outPar, dummy_chi2, N_proc);
1021  }
1022 
1024  const MPlexLV& psPar,
1025  MPlexQI& Chg,
1026  const MPlexHS& msErr,
1027  const MPlexHV& msPar,
1028  const MPlexHV& plNrm,
1029  const MPlexHV& plDir,
1030  MPlexLS& outErr,
1031  MPlexLV& outPar,
1032  MPlexQI& outFailFlag,
1033  const int N_proc,
1034  const PropagationFlags& propFlags,
1035  const bool propToHit) {
1036  if (propToHit) {
1037  MPlexLS propErr;
1038  MPlexLV propPar;
1039  propagateHelixToPlaneMPlex(psErr, psPar, Chg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1040 
1042  propErr,
1043  propPar,
1044  msErr,
1045  msPar,
1046  plNrm,
1047  plDir,
1048  outErr,
1049  outPar,
1050  dummy_chi2,
1051  N_proc);
1052  } else {
1054  psErr,
1055  psPar,
1056  msErr,
1057  msPar,
1058  plNrm,
1059  plDir,
1060  outErr,
1061  outPar,
1062  dummy_chi2,
1063  N_proc);
1064  }
1065  for (int n = 0; n < NN; ++n) {
1066  if (outPar.At(n, 3, 0) < 0) {
1067  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1068  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1069  }
1070  }
1071  }
1072 
1073  //------------------------------------------------------------------------------
1074 
1075  void kalmanComputeChi2Plane(const MPlexLS& psErr,
1076  const MPlexLV& psPar,
1077  const MPlexQI& inChg,
1078  const MPlexHS& msErr,
1079  const MPlexHV& msPar,
1080  const MPlexHV& plNrm,
1081  const MPlexHV& plDir,
1082  MPlexQF& outChi2,
1083  const int N_proc) {
1085  KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1086  }
1087 
1089  const MPlexLV& psPar,
1090  const MPlexQI& inChg,
1091  const MPlexHS& msErr,
1092  const MPlexHV& msPar,
1093  const MPlexHV& plNrm,
1094  const MPlexHV& plDir,
1095  MPlexQF& outChi2,
1096  MPlexLV& propPar,
1097  MPlexQI& outFailFlag,
1098  const int N_proc,
1099  const PropagationFlags& propFlags,
1100  const bool propToHit) {
1101  propPar = psPar;
1102  if (propToHit) {
1103  MPlexLS propErr;
1104  propagateHelixToPlaneMPlex(psErr, psPar, inChg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1105 
1107  KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1108  } else {
1110  KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1111  }
1112  }
1113 
1114  //------------------------------------------------------------------------------
1115 
1116  void kalmanOperationPlane(const int kfOp,
1117  const MPlexLS& psErr,
1118  const MPlexLV& psPar,
1119  const MPlexHS& msErr,
1120  const MPlexHV& msPar,
1121  const MPlexHV& plNrm,
1122  const MPlexHV& plDir,
1123  MPlexLS& outErr,
1124  MPlexLV& outPar,
1125  MPlexQF& outChi2,
1126  const int N_proc) {
1127 #ifdef DEBUG
1128  {
1129  dmutex_guard;
1130  printf("psPar:\n");
1131  for (int i = 0; i < 6; ++i) {
1132  printf("%8f ", psPar.constAt(0, 0, i));
1133  printf("\n");
1134  }
1135  printf("\n");
1136  printf("psErr:\n");
1137  for (int i = 0; i < 6; ++i) {
1138  for (int j = 0; j < 6; ++j)
1139  printf("%8f ", psErr.constAt(0, i, j));
1140  printf("\n");
1141  }
1142  printf("\n");
1143  printf("msPar:\n");
1144  for (int i = 0; i < 3; ++i) {
1145  printf("%8f ", msPar.constAt(0, 0, i));
1146  printf("\n");
1147  }
1148  printf("\n");
1149  printf("msErr:\n");
1150  for (int i = 0; i < 3; ++i) {
1151  for (int j = 0; j < 3; ++j)
1152  printf("%8f ", msErr.constAt(0, i, j));
1153  printf("\n");
1154  }
1155  printf("\n");
1156  }
1157 #endif
1158 
1159  // Rotate global point on tangent plane to cylinder
1160  // Tangent point is half way between hit and propagate position
1161 
1162  // Rotation matrix
1163  // D0 D1 D2
1164  // X0 X1 X2
1165  // N0 N1 N2
1166  // where D is the strip direction vector plDir, N is the normal plNrm, and X is the cross product between the two
1167 
1168  MPlex2H prj;
1169  for (int n = 0; n < NN; ++n) {
1170  prj(n, 0, 0) = plDir(n, 0, 0);
1171  prj(n, 0, 1) = plDir(n, 1, 0);
1172  prj(n, 0, 2) = plDir(n, 2, 0);
1173  prj(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
1174  prj(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
1175  prj(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
1176  }
1177 
1178  MPlexHV res_glo; //position residual in global coordinates
1179  SubtractFirst3(msPar, psPar, res_glo);
1180 
1181  MPlexHS resErr_glo; //covariance sum in global position coordinates
1182  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
1183 
1184  MPlex2V res_loc; //position residual in local coordinates
1185  RotateResidualsOnPlane(prj, res_glo, res_loc);
1186  MPlex2S resErr_loc; //covariance sum in local position coordinates
1187  MPlex2H temp2H;
1188  ProjectResErr(prj, resErr_glo, temp2H);
1189  ProjectResErrTransp(prj, temp2H, resErr_loc);
1190 
1191 #ifdef DEBUG
1192  {
1193  dmutex_guard;
1194  printf("prj:\n");
1195  for (int i = 0; i < 2; ++i) {
1196  for (int j = 0; j < 3; ++j)
1197  printf("%8f ", prj.At(0, i, j));
1198  printf("\n");
1199  }
1200  printf("\n");
1201  printf("res_glo:\n");
1202  for (int i = 0; i < 3; ++i) {
1203  printf("%8f ", res_glo.At(0, i, 0));
1204  }
1205  printf("\n");
1206  printf("resErr_glo:\n");
1207  for (int i = 0; i < 3; ++i) {
1208  for (int j = 0; j < 3; ++j)
1209  printf("%8f ", resErr_glo.At(0, i, j));
1210  printf("\n");
1211  }
1212  printf("\n");
1213  printf("res_loc:\n");
1214  for (int i = 0; i < 2; ++i) {
1215  printf("%8f ", res_loc.At(0, i, 0));
1216  }
1217  printf("\n");
1218  printf("temp2H:\n");
1219  for (int i = 0; i < 2; ++i) {
1220  for (int j = 0; j < 3; ++j)
1221  printf("%8f ", temp2H.At(0, i, j));
1222  printf("\n");
1223  }
1224  printf("\n");
1225  printf("resErr_loc:\n");
1226  for (int i = 0; i < 2; ++i) {
1227  for (int j = 0; j < 2; ++j)
1228  printf("%8f ", resErr_loc.At(0, i, j));
1229  printf("\n");
1230  }
1231  printf("\n");
1232  }
1233 #endif
1234 
1235  //invert the 2x2 matrix
1236  Matriplex::invertCramerSym(resErr_loc);
1237 
1238  if (kfOp & KFO_Calculate_Chi2) {
1239  Chi2Similarity(res_loc, resErr_loc, outChi2);
1240 
1241 #ifdef DEBUG
1242  {
1243  dmutex_guard;
1244  printf("resErr_loc (Inv):\n");
1245  for (int i = 0; i < 2; ++i) {
1246  for (int j = 0; j < 2; ++j)
1247  printf("%8f ", resErr_loc.At(0, i, j));
1248  printf("\n");
1249  }
1250  printf("\n");
1251  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1252  }
1253 #endif
1254  }
1255 
1256  if (kfOp & KFO_Update_Params) {
1257  MPlexLS psErrLoc = psErr;
1258 
1259  MPlexH2 tempH2;
1260  MPlexL2 K; // kalman gain, fixme should be L2
1261  KalmanHTG(prj, resErr_loc, tempH2); // intermediate term to get kalman gain (H^T*G)
1262  KalmanGain(psErrLoc, tempH2, K);
1263 
1264  MultResidualsAdd(K, psPar, res_loc, outPar);
1265 
1266  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
1267 
1268  MPlexLL tempLL;
1269  KHMult(K, prj, tempLL);
1270  KHC(tempLL, psErrLoc, outErr);
1271  outErr.subtract(psErrLoc, outErr);
1272 
1273 #ifdef DEBUG
1274  {
1275  dmutex_guard;
1276  if (kfOp & KFO_Local_Cov) {
1277  printf("psErrLoc:\n");
1278  for (int i = 0; i < 6; ++i) {
1279  for (int j = 0; j < 6; ++j)
1280  printf("% 8e ", psErrLoc.At(0, i, j));
1281  printf("\n");
1282  }
1283  printf("\n");
1284  }
1285  printf("resErr_loc (Inv):\n");
1286  for (int i = 0; i < 2; ++i) {
1287  for (int j = 0; j < 2; ++j)
1288  printf("%8f ", resErr_loc.At(0, i, j));
1289  printf("\n");
1290  }
1291  printf("\n");
1292  printf("tempH2:\n");
1293  for (int i = 0; i < 3; ++i) {
1294  for (int j = 0; j < 2; ++j)
1295  printf("%8f ", tempH2.At(0, i, j));
1296  printf("\n");
1297  }
1298  printf("\n");
1299  printf("K:\n");
1300  for (int i = 0; i < 6; ++i) {
1301  for (int j = 0; j < 2; ++j)
1302  printf("%8f ", K.At(0, i, j));
1303  printf("\n");
1304  }
1305  printf("\n");
1306  printf("tempLL:\n");
1307  for (int i = 0; i < 6; ++i) {
1308  for (int j = 0; j < 6; ++j)
1309  printf("%8f ", tempLL.At(0, i, j));
1310  printf("\n");
1311  }
1312  printf("\n");
1313  printf("outPar:\n");
1314  for (int i = 0; i < 6; ++i) {
1315  printf("%8f ", outPar.At(0, i, 0));
1316  }
1317  printf("\n");
1318  printf("outErr:\n");
1319  for (int i = 0; i < 6; ++i) {
1320  for (int j = 0; j < 6; ++j)
1321  printf("%8f ", outErr.At(0, i, j));
1322  printf("\n");
1323  }
1324  printf("\n");
1325  }
1326 #endif
1327  }
1328  }
1329 
1330  //==============================================================================
1331  // Kalman operations - Endcap
1332  //==============================================================================
1333 
1334  void kalmanUpdateEndcap(const MPlexLS& psErr,
1335  const MPlexLV& psPar,
1336  const MPlexHS& msErr,
1337  const MPlexHV& msPar,
1338  MPlexLS& outErr,
1339  MPlexLV& outPar,
1340  const int N_proc) {
1341  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1342  }
1343 
1345  const MPlexLV& psPar,
1346  MPlexQI& Chg,
1347  const MPlexHS& msErr,
1348  const MPlexHV& msPar,
1349  MPlexLS& outErr,
1350  MPlexLV& outPar,
1351  MPlexQI& outFailFlag,
1352  const int N_proc,
1353  const PropagationFlags& propFlags,
1354  const bool propToHit) {
1355  if (propToHit) {
1356  MPlexLS propErr;
1357  MPlexLV propPar;
1358  MPlexQF msZ;
1359 #pragma omp simd
1360  for (int n = 0; n < NN; ++n) {
1361  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
1362  }
1363 
1364  propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
1365 
1366  kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1367  } else {
1368  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1369  }
1370  for (int n = 0; n < NN; ++n) {
1371  if (n < N_proc && outPar.At(n, 3, 0) < 0) {
1372  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1373  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1374  }
1375  }
1376  }
1377 
1378  //------------------------------------------------------------------------------
1379 
1381  const MPlexLV& psPar,
1382  const MPlexQI& inChg,
1383  const MPlexHS& msErr,
1384  const MPlexHV& msPar,
1385  MPlexQF& outChi2,
1386  const int N_proc) {
1387  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1388  }
1389 
1391  const MPlexLV& psPar,
1392  const MPlexQI& inChg,
1393  const MPlexHS& msErr,
1394  const MPlexHV& msPar,
1395  MPlexQF& outChi2,
1396  MPlexLV& propPar,
1397  MPlexQI& outFailFlag,
1398  const int N_proc,
1399  const PropagationFlags& propFlags,
1400  const bool propToHit) {
1401  propPar = psPar;
1402  if (propToHit) {
1403  MPlexLS propErr;
1404  MPlexQF msZ;
1405 #pragma omp simd
1406  for (int n = 0; n < NN; ++n) {
1407  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
1408  }
1409 
1410  propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
1411 
1412  kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1413  } else {
1414  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1415  }
1416  }
1417 
1418  //------------------------------------------------------------------------------
1419 
1420  void kalmanOperationEndcap(const int kfOp,
1421  const MPlexLS& psErr,
1422  const MPlexLV& psPar,
1423  const MPlexHS& msErr,
1424  const MPlexHV& msPar,
1425  MPlexLS& outErr,
1426  MPlexLV& outPar,
1427  MPlexQF& outChi2,
1428  const int N_proc) {
1429 #ifdef DEBUG
1430  {
1431  dmutex_guard;
1432  printf("updateParametersEndcapMPlex\n");
1433  printf("psPar:\n");
1434  for (int i = 0; i < 6; ++i) {
1435  printf("%8f ", psPar.constAt(0, 0, i));
1436  printf("\n");
1437  }
1438  printf("\n");
1439  printf("msPar:\n");
1440  for (int i = 0; i < 3; ++i) {
1441  printf("%8f ", msPar.constAt(0, 0, i));
1442  printf("\n");
1443  }
1444  printf("\n");
1445  printf("psErr:\n");
1446  for (int i = 0; i < 6; ++i) {
1447  for (int j = 0; j < 6; ++j)
1448  printf("%8f ", psErr.constAt(0, i, j));
1449  printf("\n");
1450  }
1451  printf("\n");
1452  printf("msErr:\n");
1453  for (int i = 0; i < 3; ++i) {
1454  for (int j = 0; j < 3; ++j)
1455  printf("%8f ", msErr.constAt(0, i, j));
1456  printf("\n");
1457  }
1458  printf("\n");
1459  }
1460 #endif
1461 
1462  MPlex2V res;
1463  SubtractFirst2(msPar, psPar, res);
1464 
1465  MPlex2S resErr;
1466  AddIntoUpperLeft2x2(psErr, msErr, resErr);
1467 
1468 #ifdef DEBUG
1469  {
1470  dmutex_guard;
1471  printf("resErr:\n");
1472  for (int i = 0; i < 2; ++i) {
1473  for (int j = 0; j < 2; ++j)
1474  printf("%8f ", resErr.At(0, i, j));
1475  printf("\n");
1476  }
1477  printf("\n");
1478  }
1479 #endif
1480 
1481  //invert the 2x2 matrix
1483 
1484  if (kfOp & KFO_Calculate_Chi2) {
1485  Chi2Similarity(res, resErr, outChi2);
1486 
1487 #ifdef DEBUG
1488  {
1489  dmutex_guard;
1490  printf("resErr_loc (Inv):\n");
1491  for (int i = 0; i < 2; ++i) {
1492  for (int j = 0; j < 2; ++j)
1493  printf("%8f ", resErr.At(0, i, j));
1494  printf("\n");
1495  }
1496  printf("\n");
1497  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1498  }
1499 #endif
1500  }
1501 
1502  if (kfOp & KFO_Update_Params) {
1503  MPlexL2 K;
1504  KalmanGain(psErr, resErr, K);
1505 
1506  MultResidualsAdd(K, psPar, res, outPar);
1507 
1508  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
1509 
1510  KHC(K, psErr, outErr);
1511 
1512 #ifdef DEBUG
1513  {
1514  dmutex_guard;
1515  printf("outErr before subtract:\n");
1516  for (int i = 0; i < 6; ++i) {
1517  for (int j = 0; j < 6; ++j)
1518  printf("%8f ", outErr.At(0, i, j));
1519  printf("\n");
1520  }
1521  printf("\n");
1522  }
1523 #endif
1524 
1525  outErr.subtract(psErr, outErr);
1526 
1527 #ifdef DEBUG
1528  {
1529  dmutex_guard;
1530  printf("res:\n");
1531  for (int i = 0; i < 2; ++i) {
1532  printf("%8f ", res.At(0, i, 0));
1533  }
1534  printf("\n");
1535  printf("resErr (Inv):\n");
1536  for (int i = 0; i < 2; ++i) {
1537  for (int j = 0; j < 2; ++j)
1538  printf("%8f ", resErr.At(0, i, j));
1539  printf("\n");
1540  }
1541  printf("\n");
1542  printf("K:\n");
1543  for (int i = 0; i < 6; ++i) {
1544  for (int j = 0; j < 2; ++j)
1545  printf("%8f ", K.At(0, i, j));
1546  printf("\n");
1547  }
1548  printf("\n");
1549  printf("outPar:\n");
1550  for (int i = 0; i < 6; ++i) {
1551  printf("%8f ", outPar.At(0, i, 0));
1552  }
1553  printf("\n");
1554  printf("outErr:\n");
1555  for (int i = 0; i < 6; ++i) {
1556  for (int j = 0; j < 6; ++j)
1557  printf("%8f ", outErr.At(0, i, j));
1558  printf("\n");
1559  }
1560  printf("\n");
1561  }
1562 #endif
1563  }
1564  }
1565 
1566 } // end namespace mkfit
void propagateHelixToPlaneMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexHV &plPnt, const MPlexHV &plNrm, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Matriplex::Matriplex< float, HH, HH, NN > MPlexHH
Definition: Matrix.h:52
Matriplex::Matriplex< float, LL, LL, NN > MPlexLL
Definition: Matrix.h:48
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:436
#define CMS_SA_ALLOW
void propagateHelixToRMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msRad, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Definition: APVGainStruct.h:7
void kalmanOperationEndcap(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, HH, 1, NN > MPlexHV
Definition: Matrix.h:53
void kalmanUpdate(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanOperationPlane(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, LL, 2, NN > MPlexL2
Definition: Matrix.h:67
Matriplex::Matriplex< float, LL, 1, NN > MPlexLV
Definition: Matrix.h:49
void kalmanPropagateAndUpdate(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
assert(be >=bs)
__host__ __device__ VT * co
Definition: prefixScan.h:47
Definition: Electron.h:6
void squashPhiMPlex(MPlexLV &par, const int N_proc)
void kalmanOperation(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
void invertCramerSym(MPlexSym< T, D, N > &A, double *determ=nullptr)
Definition: MatriplexSym.h:420
void kalmanUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
constexpr Matriplex::idx_t NN
Definition: Matrix.h:43
void kalmanUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanPropagateAndUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
void kalmanPropagateAndUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
void kalmanPropagateAndComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< float, HH, 2, NN > MPlexH2
Definition: Matrix.h:68
d
Definition: ztail.py:151
void kalmanPropagateAndComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< int, 1, 1, NN > MPlexQI
Definition: Matrix.h:72
void kalmanPropagateAndComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< float, 2, HH, NN > MPlex2H
Definition: Matrix.h:69
void kalmanComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, 1, 1, NN > MPlexQF
Definition: Matrix.h:71
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
Matriplex::MatriplexSym< float, 2, NN > MPlex2S
Definition: Matrix.h:62
double b
Definition: hdecay.h:120
Matriplex::MatriplexSym< float, HH, NN > MPlexHS
Definition: Matrix.h:54
Matriplex::Matriplex< float, LL, HH, NN > MPlexLH
Definition: Matrix.h:64
Matriplex::Matriplex< float, 2, 1, NN > MPlex2V
Definition: Matrix.h:61
double a
Definition: hdecay.h:121
void propagateHelixToZMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msZ, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Matriplex::MatriplexSym< float, LL, NN > MPlexLS
Definition: Matrix.h:50
void kalmanComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
Definition: APVGainStruct.h:7
long double T
#define ASSUME_ALIGNED(a, b)