CMS 3D CMS Logo

KalmanUtilsMPlex.cc
Go to the documentation of this file.
1 #include "KalmanUtilsMPlex.h"
2 #include "PropagationMPlex.h"
3 
4 //#define DEBUG
5 #include "Debug.h"
6 
7 #include "KalmanUtilsMPlex.icc"
8 
10 
11 namespace {
12  using namespace mkfit;
13  using idx_t = Matriplex::idx_t;
14 
15  inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
16  // outPar = psPar + kalmanGain*(dPar)
17  // D = B A C
18  // where right half of kalman gain is 0
19 
20  // XXX Regenerate with a script.
21 
22  MultResidualsAdd_imp(A, B, C, D, 0, NN);
23  }
24 
25  inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
26  // outPar = psPar + kalmanGain*(dPar)
27  // D = B A C
28  // where right half of kalman gain is 0
29 
30  // XXX Regenerate with a script.
31 
32  typedef float T;
33  const idx_t N = NN;
34 
35  const T* a = A.fArray;
36  ASSUME_ALIGNED(a, 64);
37  const T* b = B.fArray;
38  ASSUME_ALIGNED(b, 64);
39  const T* c = C.fArray;
40  ASSUME_ALIGNED(c, 64);
41  T* d = D.fArray;
42  ASSUME_ALIGNED(d, 64);
43 
44 #pragma omp simd
45  for (idx_t n = 0; n < N; ++n) {
46  // generate loop (can also write it manually this time, it's not much)
47  d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
48  d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
49  d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
50  d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
51  d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
52  d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
53  }
54  }
55 
56  //------------------------------------------------------------------------------
57 
58  inline void Chi2Similarity(const MPlex2V& A, //resPar
59  const MPlex2S& C, //resErr
60  MPlexQF& D) //outChi2
61  {
62  // outChi2 = (resPar) * resErr * (resPar)
63  // D = A * C * A
64 
65  typedef float T;
66  const idx_t N = NN;
67 
68  const T* a = A.fArray;
69  ASSUME_ALIGNED(a, 64);
70  const T* c = C.fArray;
71  ASSUME_ALIGNED(c, 64);
72  T* d = D.fArray;
73  ASSUME_ALIGNED(d, 64);
74 
75 #pragma omp simd
76  for (idx_t n = 0; n < N; ++n) {
77  // generate loop (can also write it manually this time, it's not much)
78  d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
79  2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
80  }
81  }
82 
83  //------------------------------------------------------------------------------
84 
85  inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
86  // The rest of matrix is left untouched.
87 
88  typedef float T;
89  const idx_t N = NN;
90 
91  const T* a = A.fArray;
92  ASSUME_ALIGNED(a, 64);
93  const T* b = B.fArray;
94  ASSUME_ALIGNED(b, 64);
95  T* c = C.fArray;
96  ASSUME_ALIGNED(c, 64);
97 
98 #pragma omp simd
99  for (idx_t n = 0; n < N; ++n) {
100  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
101  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
102  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
103  c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
104  c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
105  c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
106  }
107  }
108 
109  inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
110  // The rest of matrix is left untouched.
111 
112  typedef float T;
113  const idx_t N = NN;
114 
115  const T* a = A.fArray;
116  ASSUME_ALIGNED(a, 64);
117  const T* b = B.fArray;
118  ASSUME_ALIGNED(b, 64);
119  T* c = C.fArray;
120  ASSUME_ALIGNED(c, 64);
121 
122 #pragma omp simd
123  for (idx_t n = 0; n < N; ++n) {
124  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
125  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
126  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
127  }
128  }
129 
130  //------------------------------------------------------------------------------
131 
132  inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
133  // The rest of matrix is left untouched.
134 
135  typedef float T;
136  const idx_t N = NN;
137 
138  const T* a = A.fArray;
139  ASSUME_ALIGNED(a, 64);
140  const T* b = B.fArray;
141  ASSUME_ALIGNED(b, 64);
142  T* c = C.fArray;
143  ASSUME_ALIGNED(c, 64);
144 
145 #pragma omp simd
146  for (idx_t n = 0; n < N; ++n) {
147  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
148  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
149  c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
150  }
151  }
152 
153  inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
154  // The rest of matrix is left untouched.
155 
156  typedef float T;
157  const idx_t N = NN;
158 
159  const T* a = A.fArray;
160  ASSUME_ALIGNED(a, 64);
161  const T* b = B.fArray;
162  ASSUME_ALIGNED(b, 64);
163  T* c = C.fArray;
164  ASSUME_ALIGNED(c, 64);
165 
166 #pragma omp simd
167  for (idx_t n = 0; n < N; ++n) {
168  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
169  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
170  }
171  }
172 
173  //==============================================================================
174 
175  inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
176  // C = A * B, C is 3x3, A is 3x3 , B is 3x3 sym
177 
178  // Based on script generation and adapted to custom sizes.
179 
180  typedef float T;
181  const idx_t N = NN;
182 
183  const T* a00 = A00.fArray;
184  ASSUME_ALIGNED(a00, 64);
185  const T* a01 = A01.fArray;
186  ASSUME_ALIGNED(a01, 64);
187  const T* b = B.fArray;
188  ASSUME_ALIGNED(b, 64);
189  T* c = C.fArray;
190  ASSUME_ALIGNED(c, 64);
191 
192 #pragma omp simd
193  for (int n = 0; n < N; ++n) {
194  c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
195  c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
196  c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
197  c[3 * N + n] = b[3 * N + n];
198  c[4 * N + n] = b[4 * N + n];
199  c[5 * N + n] = b[5 * N + n];
200  c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
201  c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
202  c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
203  }
204  }
205 
206  inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
207  // C = A * B, C is 3x3 sym, A is 3x3 , B is 3x3
208 
209  // Based on script generation and adapted to custom sizes.
210 
211  typedef float T;
212  const idx_t N = NN;
213 
214  const T* a00 = A00.fArray;
215  ASSUME_ALIGNED(a00, 64);
216  const T* a01 = A01.fArray;
217  ASSUME_ALIGNED(a01, 64);
218  const T* b = B.fArray;
219  ASSUME_ALIGNED(b, 64);
220  T* c = C.fArray;
221  ASSUME_ALIGNED(c, 64);
222 
223 #pragma omp simd
224  for (int n = 0; n < N; ++n) {
225  c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
226  c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
227  c[2 * N + n] = b[5 * N + n];
228  }
229  }
230 
231  inline void RotateResidualsOnTangentPlane(const MPlexQF& R00, //r00
232  const MPlexQF& R01, //r01
233  const MPlexHV& A, //res_glo
234  MPlex2V& B) //res_loc
235  {
236  RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
237  }
238 
239  //==============================================================================
240 
241  inline void ProjectResErr(const MPlex2H& A, const MPlexHS& B, MPlex2H& C) {
242  // C = A * B, C is 2x3, A is 2x3 , B is 3x3 sym
243 
244  /*
245  A 0 1 2
246  3 4 5
247  B 0 1 3
248  1 2 4
249  3 4 5
250  */
251 
252  typedef float T;
253  const idx_t N = NN;
254 
255  const T* a = A.fArray;
256  ASSUME_ALIGNED(a, 64);
257  const T* b = B.fArray;
258  ASSUME_ALIGNED(b, 64);
259  T* c = C.fArray;
260  ASSUME_ALIGNED(c, 64);
261 
262 #pragma omp simd
263  for (int n = 0; n < N; ++n) {
264  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n];
265  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n];
266  c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n];
267  c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n];
268  c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
269  c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n];
270  }
271  }
272 
273  inline void ProjectResErrTransp(const MPlex2H& A, const MPlex2H& B, MPlex2S& C) {
274  // C = B * A^T, C is 2x2 sym, A is 2x3 (A^T is 3x2), B is 2x3
275 
276  /*
277  B 0 1 2
278  3 4 5
279  A^T 0 3
280  1 4
281  2 5
282  */
283 
284  typedef float T;
285  const idx_t N = NN;
286 
287  const T* a = A.fArray;
288  ASSUME_ALIGNED(a, 64);
289  const T* b = B.fArray;
290  ASSUME_ALIGNED(b, 64);
291  T* c = C.fArray;
292  ASSUME_ALIGNED(c, 64);
293 
294 #pragma omp simd
295  for (int n = 0; n < N; ++n) {
296  c[0 * N + n] = b[0 * N + n] * a[0 * N + n] + b[1 * N + n] * a[1 * N + n] + b[2 * N + n] * a[2 * N + n];
297  c[1 * N + n] = b[0 * N + n] * a[3 * N + n] + b[1 * N + n] * a[4 * N + n] + b[2 * N + n] * a[5 * N + n];
298  c[2 * N + n] = b[3 * N + n] * a[3 * N + n] + b[4 * N + n] * a[4 * N + n] + b[5 * N + n] * a[5 * N + n];
299  }
300  }
301 
302  inline void RotateResidualsOnPlane(const MPlex2H& R, //prj
303  const MPlexHV& A, //res_glo
304  MPlex2V& B) //res_loc
305  {
306  // typedef float T;
307  // const idx_t N = NN;
308 
309  // const T* a = A.fArray;
310  // ASSUME_ALIGNED(a, 64);
311  // T* b = B.fArray;
312  // ASSUME_ALIGNED(b, 64);
313  // const T* r = R.fArray;
314  // ASSUME_ALIGNED(r, 64);
315 
316 #pragma omp simd
317  for (int n = 0; n < NN; ++n) {
318  B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
319  B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
320  }
321  }
322 
323  inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
324  // HTG = rot * res_loc
325  // C = A * B
326 
327  // Based on script generation and adapted to custom sizes.
328 
329  typedef float T;
330  const idx_t N = NN;
331 
332  const T* a00 = A00.fArray;
333  ASSUME_ALIGNED(a00, 64);
334  const T* a01 = A01.fArray;
335  ASSUME_ALIGNED(a01, 64);
336  const T* b = B.fArray;
337  ASSUME_ALIGNED(b, 64);
338  T* c = C.fArray;
339  ASSUME_ALIGNED(c, 64);
340 
341 #pragma omp simd
342  for (int n = 0; n < N; ++n) {
343  c[0 * N + n] = a00[n] * b[0 * N + n];
344  c[1 * N + n] = a00[n] * b[1 * N + n];
345  c[2 * N + n] = 0.;
346  c[3 * N + n] = a01[n] * b[0 * N + n];
347  c[4 * N + n] = a01[n] * b[1 * N + n];
348  c[5 * N + n] = 0.;
349  c[6 * N + n] = b[1 * N + n];
350  c[7 * N + n] = b[2 * N + n];
351  c[8 * N + n] = 0.;
352  }
353  }
354 
355  inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
356  // C = A * B, C is 6x3, A is 6x6 sym , B is 3x3
357 
358  typedef float T;
359  const idx_t N = NN;
360 
361  const T* a = A.fArray;
362  ASSUME_ALIGNED(a, 64);
363  const T* b = B.fArray;
364  ASSUME_ALIGNED(b, 64);
365  T* c = C.fArray;
366  ASSUME_ALIGNED(c, 64);
367 
368 #pragma omp simd
369  for (int n = 0; n < N; ++n) {
370  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
371  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
372  c[2 * N + n] = 0;
373  c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
374  c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
375  c[5 * N + n] = 0;
376  c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
377  c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
378  c[8 * N + n] = 0;
379  c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
380  c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
381  c[11 * N + n] = 0;
382  c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
383  c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
384  c[14 * N + n] = 0;
385  c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
386  c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
387  c[17 * N + n] = 0;
388  }
389  }
390 
391  inline void KalmanHTG(const MPlex2H& A, const MPlex2S& B, MPlexH2& C) {
392  // HTG = prj^T * res_loc
393  // C = A^T * B
394 
395  /*
396  A^T 0 3
397  1 4
398  2 5
399  B 0 1
400  1 2
401  C 0 1
402  2 3
403  4 5
404  */
405 
406  typedef float T;
407  const idx_t N = NN;
408 
409  const T* a = A.fArray;
410  ASSUME_ALIGNED(a, 64);
411  const T* b = B.fArray;
412  ASSUME_ALIGNED(b, 64);
413  T* c = C.fArray;
414  ASSUME_ALIGNED(c, 64);
415 
416 #pragma omp simd
417  for (int n = 0; n < N; ++n) {
418  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[3 * N + n] * b[1 * N + n];
419  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[3 * N + n] * b[2 * N + n];
420  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n];
421  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n];
422  c[4 * N + n] = a[2 * N + n] * b[0 * N + n] + a[5 * N + n] * b[1 * N + n];
423  c[5 * N + n] = a[2 * N + n] * b[1 * N + n] + a[5 * N + n] * b[2 * N + n];
424  }
425  }
426 
427  inline void KalmanGain(const MPlexLS& A, const MPlexH2& B, MPlexL2& C) {
428  // C = A * B, C is 6x2, A is 6x6 sym , B is 3x2 (6x2 but half of it is zeros)
429 
430  /*
431  A 0 1 3 6 10 15
432  1 2 4 7 11 16
433  3 4 5 8 12 17
434  6 7 8 9 13 18
435  10 11 12 13 14 19
436  15 16 17 18 19 20
437  B 0 1
438  2 3
439  4 5
440  X X with X=0, so not even included in B
441  X X
442  X X
443  C 0 1
444  2 3
445  4 5
446  6 7
447  8 9
448  10 11
449  */
450 
451  typedef float T;
452  const idx_t N = NN;
453 
454  const T* a = A.fArray;
455  ASSUME_ALIGNED(a, 64);
456  const T* b = B.fArray;
457  ASSUME_ALIGNED(b, 64);
458  T* c = C.fArray;
459  ASSUME_ALIGNED(c, 64);
460 
461 #pragma omp simd
462  for (int n = 0; n < N; ++n) {
463  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[2 * N + n] + a[3 * N + n] * b[4 * N + n];
464  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[5 * N + n];
465  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[2 * N + n] + a[4 * N + n] * b[4 * N + n];
466  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[5 * N + n];
467  c[4 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
468  c[5 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[5 * N + n];
469  c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n];
470  c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[5 * N + n];
471  c[8 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[2 * N + n] + a[12 * N + n] * b[4 * N + n];
472  c[9 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[5 * N + n];
473  c[10 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[2 * N + n] + a[17 * N + n] * b[4 * N + n];
474  c[11 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[5 * N + n];
475  }
476  }
477 
478  inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) {
479  // C is transformed to align along y after rotation and rotated back
480 
481  typedef float T;
482  const idx_t N = NN;
483 
484  const T* r00 = R00.fArray;
485  ASSUME_ALIGNED(r00, 64);
486  const T* r01 = R01.fArray;
487  ASSUME_ALIGNED(r01, 64);
488  const T* ci = Ci.fArray;
489  ASSUME_ALIGNED(ci, 64);
490  T* co = Co.fArray;
491  ASSUME_ALIGNED(co, 64);
492 
493 #pragma omp simd
494  for (int n = 0; n < N; ++n) {
495  // a bit loopy to avoid temporaries
496  co[0 * N + n] =
497  r00[n] * r00[n] * ci[0 * N + n] + 2 * r00[n] * r01[n] * ci[1 * N + n] + r01[n] * r01[n] * ci[2 * N + n];
498  co[1 * N + n] = r00[n] * r01[n] * co[0 * N + n];
499  co[2 * N + n] = r01[n] * r01[n] * co[0 * N + n];
500  co[0 * N + n] = r00[n] * r00[n] * co[0 * N + n];
501 
502  co[3 * N + n] = r00[n] * ci[3 * N + n] + r01[n] * ci[4 * N + n];
503  co[4 * N + n] = r01[n] * co[3 * N + n];
504  co[3 * N + n] = r00[n] * co[3 * N + n];
505 
506  co[6 * N + n] = r00[n] * ci[6 * N + n] + r01[n] * ci[7 * N + n];
507  co[7 * N + n] = r01[n] * co[6 * N + n];
508  co[6 * N + n] = r00[n] * co[6 * N + n];
509 
510  co[10 * N + n] = r00[n] * ci[10 * N + n] + r01[n] * ci[11 * N + n];
511  co[11 * N + n] = r01[n] * co[10 * N + n];
512  co[10 * N + n] = r00[n] * co[10 * N + n];
513 
514  co[15 * N + n] = r00[n] * ci[15 * N + n] + r01[n] * ci[16 * N + n];
515  co[16 * N + n] = r01[n] * co[15 * N + n];
516  co[15 * N + n] = r00[n] * co[15 * N + n];
517  }
518  }
519 
520  void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
521  // C = A * B, C is 6x2, A is 6x6 sym , B is 2x2
522 
523  typedef float T;
524  const idx_t N = NN;
525 
526  const T* a = A.fArray;
527  ASSUME_ALIGNED(a, 64);
528  const T* b = B.fArray;
529  ASSUME_ALIGNED(b, 64);
530  T* c = C.fArray;
531  ASSUME_ALIGNED(c, 64);
532 
533 #include "KalmanGain62.ah"
534  }
535 
536  inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
537  // C = A * B, C is 6x6, A is 6x3 , B is 3x6
538  KHMult_imp(A, B00, B01, C, 0, NN);
539  }
540 
541  inline void KHMult(const MPlexL2& A, const MPlex2H& B, MPlexLL& C) {
542  // C = A * B, C is 6x6, A is 6x2 , B is 2x3 (2x6 but half of it made of zeros)
543 
544  /*
545  A 0 1
546  2 3
547  4 5
548  6 7
549  8 9
550  10 11
551  B 0 1 2 X X X with X=0 so not included in B
552  3 4 5 X X X
553  C 0 1 2 3 4 5
554  6 7 8 9 10 11
555  12 13 14 15 16 17
556  18 19 20 21 22 23
557  24 25 26 27 28 29
558  30 31 32 33 34 34
559  */
560 
561  // typedef float T;
562  // const idx_t N = NN;
563 
564  // const T* a = A.fArray;
565  // ASSUME_ALIGNED(a, 64);
566  // const T* b = B.fArray;
567  // ASSUME_ALIGNED(b, 64);
568  // T* c = C.fArray;
569  // ASSUME_ALIGNED(c, 64);
570 
571 #pragma omp simd
572  for (int n = 0; n < NN; ++n) {
573  C(n, 0, 0) = A(n, 0, 0) * B(n, 0, 0) + A(n, 0, 1) * B(n, 1, 0);
574  C(n, 0, 1) = A(n, 0, 0) * B(n, 0, 1) + A(n, 0, 1) * B(n, 1, 1);
575  C(n, 0, 2) = A(n, 0, 0) * B(n, 0, 2) + A(n, 0, 1) * B(n, 1, 2);
576  C(n, 0, 3) = 0;
577  C(n, 0, 4) = 0;
578  C(n, 0, 5) = 0;
579  C(n, 0, 6) = A(n, 1, 0) * B(n, 0, 0) + A(n, 1, 1) * B(n, 1, 0);
580  C(n, 0, 7) = A(n, 1, 0) * B(n, 0, 1) + A(n, 1, 1) * B(n, 1, 1);
581  C(n, 0, 8) = A(n, 1, 0) * B(n, 0, 2) + A(n, 1, 1) * B(n, 1, 2);
582  C(n, 0, 9) = 0;
583  C(n, 0, 10) = 0;
584  C(n, 0, 11) = 0;
585  C(n, 0, 12) = A(n, 2, 0) * B(n, 0, 0) + A(n, 2, 1) * B(n, 1, 0);
586  C(n, 0, 13) = A(n, 2, 0) * B(n, 0, 1) + A(n, 2, 1) * B(n, 1, 1);
587  C(n, 0, 14) = A(n, 2, 0) * B(n, 0, 2) + A(n, 2, 1) * B(n, 1, 2);
588  C(n, 0, 15) = 0;
589  C(n, 0, 16) = 0;
590  C(n, 0, 17) = 0;
591  C(n, 0, 18) = A(n, 3, 0) * B(n, 0, 0) + A(n, 3, 1) * B(n, 1, 0);
592  C(n, 0, 19) = A(n, 3, 0) * B(n, 0, 1) + A(n, 3, 1) * B(n, 1, 1);
593  C(n, 0, 20) = A(n, 3, 0) * B(n, 0, 2) + A(n, 3, 1) * B(n, 1, 2);
594  C(n, 0, 21) = 0;
595  C(n, 0, 22) = 0;
596  C(n, 0, 23) = 0;
597  C(n, 0, 24) = A(n, 4, 0) * B(n, 0, 0) + A(n, 4, 1) * B(n, 1, 0);
598  C(n, 0, 25) = A(n, 4, 0) * B(n, 0, 1) + A(n, 4, 1) * B(n, 1, 1);
599  C(n, 0, 26) = A(n, 4, 0) * B(n, 0, 2) + A(n, 4, 1) * B(n, 1, 2);
600  C(n, 0, 27) = 0;
601  C(n, 0, 28) = 0;
602  C(n, 0, 29) = 0;
603  C(n, 0, 30) = A(n, 5, 0) * B(n, 0, 0) + A(n, 5, 1) * B(n, 1, 0);
604  C(n, 0, 31) = A(n, 5, 0) * B(n, 0, 1) + A(n, 5, 1) * B(n, 1, 1);
605  C(n, 0, 32) = A(n, 5, 0) * B(n, 0, 2) + A(n, 5, 1) * B(n, 1, 2);
606  C(n, 0, 33) = 0;
607  C(n, 0, 34) = 0;
608  C(n, 0, 35) = 0;
609  }
610  }
611 
612  inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
613  // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym
614 
615  typedef float T;
616  const idx_t N = NN;
617 
618  const T* a = A.fArray;
619  ASSUME_ALIGNED(a, 64);
620  const T* b = B.fArray;
621  ASSUME_ALIGNED(b, 64);
622  T* c = C.fArray;
623  ASSUME_ALIGNED(c, 64);
624 
625 #include "KHC.ah"
626  }
627 
628  inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
629  // C = A * B, C is 6x6 sym, A is 6x2 , B is 6x6 sym
630 
631  typedef float T;
632  const idx_t N = NN;
633 
634  const T* a = A.fArray;
635  ASSUME_ALIGNED(a, 64);
636  const T* b = B.fArray;
637  ASSUME_ALIGNED(b, 64);
638  T* c = C.fArray;
639  ASSUME_ALIGNED(c, 64);
640 
641 #include "K62HC.ah"
642  }
643 
644  //Warning: MultFull is not vectorized!
645  template <typename T1, typename T2, typename T3>
646  void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
647 #ifdef DEBUG
648  assert(nja == nib);
649  assert(nia == nic);
650  assert(njb == njc);
651 #endif
652  for (int n = 0; n < NN; ++n) {
653  for (int i = 0; i < nia; ++i) {
654  for (int j = 0; j < njb; ++j) {
655  C(n, i, j) = 0.;
656  for (int k = 0; k < nja; ++k)
657  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
658  }
659  }
660  }
661  }
662 
663  //Warning: MultTranspFull is not vectorized!
664  // (careful about which one is transposed, I think rows and cols are swapped and the one that is transposed is A)
665  template <typename T1, typename T2, typename T3>
666  void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
667 #ifdef DEBUG
668  assert(nja == njb);
669  assert(nia == nic);
670  assert(nib == njc);
671 #endif
672  for (int n = 0; n < NN; ++n) {
673  for (int i = 0; i < nia; ++i) {
674  for (int j = 0; j < nib; ++j) {
675  C(n, i, j) = 0.;
676  for (int k = 0; k < nja; ++k)
677  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
678  }
679  }
680  }
681  }
682 
683 } // namespace
684 
685 //==============================================================================
686 // Kalman operations - common dummy variables
687 //==============================================================================
688 
689 namespace {
690  // Dummy variables for parameter consistency to kalmanOperation.
691  // Through KalmanFilterOperation enum parameter it is guaranteed that
692  // those will never get accessed in the code (read from or written into).
693 
694  CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
695  CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
696  CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
697 } // namespace
698 
699 namespace mkfit {
700 
701  //==============================================================================
702  // Kalman operations - Barrel
703  //==============================================================================
704 
705  void kalmanUpdate(const MPlexLS& psErr,
706  const MPlexLV& psPar,
707  const MPlexHS& msErr,
708  const MPlexHV& msPar,
709  MPlexLS& outErr,
710  MPlexLV& outPar,
711  const int N_proc) {
712  kalmanOperation(KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
713  }
714 
716  const MPlexLV& psPar,
717  MPlexQI& Chg,
718  const MPlexHS& msErr,
719  const MPlexHV& msPar,
720  MPlexLS& outErr,
721  MPlexLV& outPar,
722  MPlexQI& outFailFlag,
723  const int N_proc,
724  const PropagationFlags& propFlags,
725  const bool propToHit) {
726  if (propToHit) {
727  MPlexLS propErr;
728  MPlexLV propPar;
729  MPlexQF msRad;
730 #pragma omp simd
731  for (int n = 0; n < NN; ++n) {
732  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
733  }
734 
735  propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
736 
738  KFO_Update_Params | KFO_Local_Cov, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
739  } else {
741  KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
742  }
743  for (int n = 0; n < NN; ++n) {
744  if (outPar.At(n, 3, 0) < 0) {
745  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
746  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
747  }
748  }
749  }
750 
751  //------------------------------------------------------------------------------
752 
753  void kalmanComputeChi2(const MPlexLS& psErr,
754  const MPlexLV& psPar,
755  const MPlexQI& inChg,
756  const MPlexHS& msErr,
757  const MPlexHV& msPar,
758  MPlexQF& outChi2,
759  const int N_proc) {
760  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
761  }
762 
764  const MPlexLV& psPar,
765  const MPlexQI& inChg,
766  const MPlexHS& msErr,
767  const MPlexHV& msPar,
768  MPlexQF& outChi2,
769  MPlexLV& propPar,
770  MPlexQI& outFailFlag,
771  const int N_proc,
772  const PropagationFlags& propFlags,
773  const bool propToHit) {
774  propPar = psPar;
775  if (propToHit) {
776  MPlexLS propErr;
777  MPlexQF msRad;
778 #pragma omp simd
779  for (int n = 0; n < NN; ++n) {
780  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
781  }
782 
783  propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
784 
785  kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
786  } else {
787  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
788  }
789  }
790 
791  //------------------------------------------------------------------------------
792 
793  void kalmanOperation(const int kfOp,
794  const MPlexLS& psErr,
795  const MPlexLV& psPar,
796  const MPlexHS& msErr,
797  const MPlexHV& msPar,
798  MPlexLS& outErr,
799  MPlexLV& outPar,
800  MPlexQF& outChi2,
801  const int N_proc) {
802 #ifdef DEBUG
803  {
804  dmutex_guard;
805  printf("psPar:\n");
806  for (int i = 0; i < 6; ++i) {
807  printf("%8f ", psPar.constAt(0, 0, i));
808  printf("\n");
809  }
810  printf("\n");
811  printf("psErr:\n");
812  for (int i = 0; i < 6; ++i) {
813  for (int j = 0; j < 6; ++j)
814  printf("%8f ", psErr.constAt(0, i, j));
815  printf("\n");
816  }
817  printf("\n");
818  printf("msPar:\n");
819  for (int i = 0; i < 3; ++i) {
820  printf("%8f ", msPar.constAt(0, 0, i));
821  printf("\n");
822  }
823  printf("\n");
824  printf("msErr:\n");
825  for (int i = 0; i < 3; ++i) {
826  for (int j = 0; j < 3; ++j)
827  printf("%8f ", msErr.constAt(0, i, j));
828  printf("\n");
829  }
830  printf("\n");
831  }
832 #endif
833 
834  // Rotate global point on tangent plane to cylinder
835  // Tangent point is half way between hit and propagate position
836 
837  // Rotation matrix
838  // rotT00 0 rotT01
839  // rotT01 0 -rotT00
840  // 0 1 0
841  // Minimize temporaries: only two float are needed!
842 
843  MPlexQF rotT00;
844  MPlexQF rotT01;
845  for (int n = 0; n < NN; ++n) {
846  const float r = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
847  rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
848  rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
849  }
850 
851  MPlexHV res_glo; //position residual in global coordinates
852  SubtractFirst3(msPar, psPar, res_glo);
853 
854  MPlexHS resErr_glo; //covariance sum in global position coordinates
855  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
856 
857  MPlex2V res_loc; //position residual in local coordinates
858  RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
859  MPlex2S resErr_loc; //covariance sum in local position coordinates
860  MPlexHH tempHH;
861  ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
862  ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
863 
864 #ifdef DEBUG
865  {
866  dmutex_guard;
867  printf("res_glo:\n");
868  for (int i = 0; i < 3; ++i) {
869  printf("%8f ", res_glo.At(0, i, 0));
870  }
871  printf("\n");
872  printf("resErr_glo:\n");
873  for (int i = 0; i < 3; ++i) {
874  for (int j = 0; j < 3; ++j)
875  printf("%8f ", resErr_glo.At(0, i, j));
876  printf("\n");
877  }
878  printf("\n");
879  printf("res_loc:\n");
880  for (int i = 0; i < 2; ++i) {
881  printf("%8f ", res_loc.At(0, i, 0));
882  }
883  printf("\n");
884  printf("tempHH:\n");
885  for (int i = 0; i < 3; ++i) {
886  for (int j = 0; j < 3; ++j)
887  printf("%8f ", tempHH.At(0, i, j));
888  printf("\n");
889  }
890  printf("\n");
891  printf("resErr_loc:\n");
892  for (int i = 0; i < 2; ++i) {
893  for (int j = 0; j < 2; ++j)
894  printf("%8f ", resErr_loc.At(0, i, j));
895  printf("\n");
896  }
897  printf("\n");
898  }
899 #endif
900 
901  //invert the 2x2 matrix
902  Matriplex::invertCramerSym(resErr_loc);
903 
904  if (kfOp & KFO_Calculate_Chi2) {
905  Chi2Similarity(res_loc, resErr_loc, outChi2);
906 
907 #ifdef DEBUG
908  {
909  dmutex_guard;
910  printf("resErr_loc (Inv):\n");
911  for (int i = 0; i < 2; ++i) {
912  for (int j = 0; j < 2; ++j)
913  printf("%8f ", resErr_loc.At(0, i, j));
914  printf("\n");
915  }
916  printf("\n");
917  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
918  }
919 #endif
920  }
921 
922  if (kfOp & KFO_Update_Params) {
923  MPlexLS psErrLoc = psErr;
924  if (kfOp & KFO_Local_Cov)
925  CovXYconstrain(rotT00, rotT01, psErr, psErrLoc);
926 
927  MPlexLH K; // kalman gain, fixme should be L2
928  KalmanHTG(rotT00, rotT01, resErr_loc, tempHH); // intermediate term to get kalman gain (H^T*G)
929  KalmanGain(psErrLoc, tempHH, K);
930 
931  MultResidualsAdd(K, psPar, res_loc, outPar);
932 
933  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
934 
935  MPlexLL tempLL;
936  KHMult(K, rotT00, rotT01, tempLL);
937  KHC(tempLL, psErrLoc, outErr);
938  outErr.subtract(psErrLoc, outErr);
939 
940 #ifdef DEBUG
941  {
942  dmutex_guard;
943  if (kfOp & KFO_Local_Cov) {
944  printf("psErrLoc:\n");
945  for (int i = 0; i < 6; ++i) {
946  for (int j = 0; j < 6; ++j)
947  printf("% 8e ", psErrLoc.At(0, i, j));
948  printf("\n");
949  }
950  printf("\n");
951  }
952  printf("resErr_loc (Inv):\n");
953  for (int i = 0; i < 2; ++i) {
954  for (int j = 0; j < 2; ++j)
955  printf("%8f ", resErr_loc.At(0, i, j));
956  printf("\n");
957  }
958  printf("\n");
959  printf("tempHH:\n");
960  for (int i = 0; i < 3; ++i) {
961  for (int j = 0; j < 3; ++j)
962  printf("%8f ", tempHH.At(0, i, j));
963  printf("\n");
964  }
965  printf("\n");
966  printf("K:\n");
967  for (int i = 0; i < 6; ++i) {
968  for (int j = 0; j < 3; ++j)
969  printf("%8f ", K.At(0, i, j));
970  printf("\n");
971  }
972  printf("\n");
973  printf("tempLL:\n");
974  for (int i = 0; i < 6; ++i) {
975  for (int j = 0; j < 6; ++j)
976  printf("%8f ", tempLL.At(0, i, j));
977  printf("\n");
978  }
979  printf("\n");
980  printf("outPar:\n");
981  for (int i = 0; i < 6; ++i) {
982  printf("%8f ", outPar.At(0, i, 0));
983  }
984  printf("\n");
985  printf("outErr:\n");
986  for (int i = 0; i < 6; ++i) {
987  for (int j = 0; j < 6; ++j)
988  printf("%8f ", outErr.At(0, i, j));
989  printf("\n");
990  }
991  printf("\n");
992  }
993 #endif
994  }
995  }
996 
997  //==============================================================================
998  // Kalman operations - Plane
999  //==============================================================================
1000 
1001  void kalmanUpdatePlane(const MPlexLS& psErr,
1002  const MPlexLV& psPar,
1003  const MPlexHS& msErr,
1004  const MPlexHV& msPar,
1005  const MPlexHV& plNrm,
1006  const MPlexHV& plDir,
1007  MPlexLS& outErr,
1008  MPlexLV& outPar,
1009  const int N_proc) {
1011  KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, plNrm, plDir, outErr, outPar, dummy_chi2, N_proc);
1012  }
1013 
1015  const MPlexLV& psPar,
1016  MPlexQI& Chg,
1017  const MPlexHS& msErr,
1018  const MPlexHV& msPar,
1019  const MPlexHV& plNrm,
1020  const MPlexHV& plDir,
1021  MPlexLS& outErr,
1022  MPlexLV& outPar,
1023  MPlexQI& outFailFlag,
1024  const int N_proc,
1025  const PropagationFlags& propFlags,
1026  const bool propToHit) {
1027  if (propToHit) {
1028  MPlexLS propErr;
1029  MPlexLV propPar;
1030  propagateHelixToPlaneMPlex(psErr, psPar, Chg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1031 
1033  propErr,
1034  propPar,
1035  msErr,
1036  msPar,
1037  plNrm,
1038  plDir,
1039  outErr,
1040  outPar,
1041  dummy_chi2,
1042  N_proc);
1043  } else {
1045  psErr,
1046  psPar,
1047  msErr,
1048  msPar,
1049  plNrm,
1050  plDir,
1051  outErr,
1052  outPar,
1053  dummy_chi2,
1054  N_proc);
1055  }
1056  for (int n = 0; n < NN; ++n) {
1057  if (outPar.At(n, 3, 0) < 0) {
1058  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1059  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1060  }
1061  }
1062  }
1063 
1064  //------------------------------------------------------------------------------
1065 
1066  void kalmanComputeChi2Plane(const MPlexLS& psErr,
1067  const MPlexLV& psPar,
1068  const MPlexQI& inChg,
1069  const MPlexHS& msErr,
1070  const MPlexHV& msPar,
1071  const MPlexHV& plNrm,
1072  const MPlexHV& plDir,
1073  MPlexQF& outChi2,
1074  const int N_proc) {
1076  KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1077  }
1078 
1080  const MPlexLV& psPar,
1081  const MPlexQI& inChg,
1082  const MPlexHS& msErr,
1083  const MPlexHV& msPar,
1084  const MPlexHV& plNrm,
1085  const MPlexHV& plDir,
1086  MPlexQF& outChi2,
1087  MPlexLV& propPar,
1088  MPlexQI& outFailFlag,
1089  const int N_proc,
1090  const PropagationFlags& propFlags,
1091  const bool propToHit) {
1092  propPar = psPar;
1093  if (propToHit) {
1094  MPlexLS propErr;
1095  propagateHelixToPlaneMPlex(psErr, psPar, inChg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1096 
1098  KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1099  } else {
1101  KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc);
1102  }
1103  }
1104 
1105  //------------------------------------------------------------------------------
1106 
1107  void kalmanOperationPlane(const int kfOp,
1108  const MPlexLS& psErr,
1109  const MPlexLV& psPar,
1110  const MPlexHS& msErr,
1111  const MPlexHV& msPar,
1112  const MPlexHV& plNrm,
1113  const MPlexHV& plDir,
1114  MPlexLS& outErr,
1115  MPlexLV& outPar,
1116  MPlexQF& outChi2,
1117  const int N_proc) {
1118 #ifdef DEBUG
1119  {
1120  dmutex_guard;
1121  printf("psPar:\n");
1122  for (int i = 0; i < 6; ++i) {
1123  printf("%8f ", psPar.constAt(0, 0, i));
1124  printf("\n");
1125  }
1126  printf("\n");
1127  printf("psErr:\n");
1128  for (int i = 0; i < 6; ++i) {
1129  for (int j = 0; j < 6; ++j)
1130  printf("%8f ", psErr.constAt(0, i, j));
1131  printf("\n");
1132  }
1133  printf("\n");
1134  printf("msPar:\n");
1135  for (int i = 0; i < 3; ++i) {
1136  printf("%8f ", msPar.constAt(0, 0, i));
1137  printf("\n");
1138  }
1139  printf("\n");
1140  printf("msErr:\n");
1141  for (int i = 0; i < 3; ++i) {
1142  for (int j = 0; j < 3; ++j)
1143  printf("%8f ", msErr.constAt(0, i, j));
1144  printf("\n");
1145  }
1146  printf("\n");
1147  }
1148 #endif
1149 
1150  // Rotate global point on tangent plane to cylinder
1151  // Tangent point is half way between hit and propagate position
1152 
1153  // Rotation matrix
1154  // D0 D1 D2
1155  // X0 X1 X2
1156  // N0 N1 N2
1157  // where D is the strip direction vector plDir, N is the normal plNrm, and X is the cross product between the two
1158 
1159  MPlex2H prj;
1160  for (int n = 0; n < NN; ++n) {
1161  prj(n, 0, 0) = plDir(n, 0, 0);
1162  prj(n, 0, 1) = plDir(n, 1, 0);
1163  prj(n, 0, 2) = plDir(n, 2, 0);
1164  prj(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
1165  prj(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
1166  prj(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
1167  }
1168 
1169  MPlexHV res_glo; //position residual in global coordinates
1170  SubtractFirst3(msPar, psPar, res_glo);
1171 
1172  MPlexHS resErr_glo; //covariance sum in global position coordinates
1173  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
1174 
1175  MPlex2V res_loc; //position residual in local coordinates
1176  RotateResidualsOnPlane(prj, res_glo, res_loc);
1177  MPlex2S resErr_loc; //covariance sum in local position coordinates
1178  MPlex2H temp2H;
1179  ProjectResErr(prj, resErr_glo, temp2H);
1180  ProjectResErrTransp(prj, temp2H, resErr_loc);
1181 
1182 #ifdef DEBUG
1183  {
1184  dmutex_guard;
1185  printf("prj:\n");
1186  for (int i = 0; i < 2; ++i) {
1187  for (int j = 0; j < 3; ++j)
1188  printf("%8f ", prj.At(0, i, j));
1189  printf("\n");
1190  }
1191  printf("\n");
1192  printf("res_glo:\n");
1193  for (int i = 0; i < 3; ++i) {
1194  printf("%8f ", res_glo.At(0, i, 0));
1195  }
1196  printf("\n");
1197  printf("resErr_glo:\n");
1198  for (int i = 0; i < 3; ++i) {
1199  for (int j = 0; j < 3; ++j)
1200  printf("%8f ", resErr_glo.At(0, i, j));
1201  printf("\n");
1202  }
1203  printf("\n");
1204  printf("res_loc:\n");
1205  for (int i = 0; i < 2; ++i) {
1206  printf("%8f ", res_loc.At(0, i, 0));
1207  }
1208  printf("\n");
1209  printf("temp2H:\n");
1210  for (int i = 0; i < 2; ++i) {
1211  for (int j = 0; j < 3; ++j)
1212  printf("%8f ", temp2H.At(0, i, j));
1213  printf("\n");
1214  }
1215  printf("\n");
1216  printf("resErr_loc:\n");
1217  for (int i = 0; i < 2; ++i) {
1218  for (int j = 0; j < 2; ++j)
1219  printf("%8f ", resErr_loc.At(0, i, j));
1220  printf("\n");
1221  }
1222  printf("\n");
1223  }
1224 #endif
1225 
1226  //invert the 2x2 matrix
1227  Matriplex::invertCramerSym(resErr_loc);
1228 
1229  if (kfOp & KFO_Calculate_Chi2) {
1230  Chi2Similarity(res_loc, resErr_loc, outChi2);
1231 
1232 #ifdef DEBUG
1233  {
1234  dmutex_guard;
1235  printf("resErr_loc (Inv):\n");
1236  for (int i = 0; i < 2; ++i) {
1237  for (int j = 0; j < 2; ++j)
1238  printf("%8f ", resErr_loc.At(0, i, j));
1239  printf("\n");
1240  }
1241  printf("\n");
1242  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1243  }
1244 #endif
1245  }
1246 
1247  if (kfOp & KFO_Update_Params) {
1248  MPlexLS psErrLoc = psErr;
1249 
1250  MPlexH2 tempH2;
1251  MPlexL2 K; // kalman gain, fixme should be L2
1252  KalmanHTG(prj, resErr_loc, tempH2); // intermediate term to get kalman gain (H^T*G)
1253  KalmanGain(psErrLoc, tempH2, K);
1254 
1255  MultResidualsAdd(K, psPar, res_loc, outPar);
1256 
1257  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
1258 
1259  MPlexLL tempLL;
1260  KHMult(K, prj, tempLL);
1261  KHC(tempLL, psErrLoc, outErr);
1262  outErr.subtract(psErrLoc, outErr);
1263 
1264 #ifdef DEBUG
1265  {
1266  dmutex_guard;
1267  if (kfOp & KFO_Local_Cov) {
1268  printf("psErrLoc:\n");
1269  for (int i = 0; i < 6; ++i) {
1270  for (int j = 0; j < 6; ++j)
1271  printf("% 8e ", psErrLoc.At(0, i, j));
1272  printf("\n");
1273  }
1274  printf("\n");
1275  }
1276  printf("resErr_loc (Inv):\n");
1277  for (int i = 0; i < 2; ++i) {
1278  for (int j = 0; j < 2; ++j)
1279  printf("%8f ", resErr_loc.At(0, i, j));
1280  printf("\n");
1281  }
1282  printf("\n");
1283  printf("tempH2:\n");
1284  for (int i = 0; i < 3; ++i) {
1285  for (int j = 0; j < 2; ++j)
1286  printf("%8f ", tempH2.At(0, i, j));
1287  printf("\n");
1288  }
1289  printf("\n");
1290  printf("K:\n");
1291  for (int i = 0; i < 6; ++i) {
1292  for (int j = 0; j < 2; ++j)
1293  printf("%8f ", K.At(0, i, j));
1294  printf("\n");
1295  }
1296  printf("\n");
1297  printf("tempLL:\n");
1298  for (int i = 0; i < 6; ++i) {
1299  for (int j = 0; j < 6; ++j)
1300  printf("%8f ", tempLL.At(0, i, j));
1301  printf("\n");
1302  }
1303  printf("\n");
1304  printf("outPar:\n");
1305  for (int i = 0; i < 6; ++i) {
1306  printf("%8f ", outPar.At(0, i, 0));
1307  }
1308  printf("\n");
1309  printf("outErr:\n");
1310  for (int i = 0; i < 6; ++i) {
1311  for (int j = 0; j < 6; ++j)
1312  printf("%8f ", outErr.At(0, i, j));
1313  printf("\n");
1314  }
1315  printf("\n");
1316  }
1317 #endif
1318  }
1319  }
1320 
1321  //==============================================================================
1322  // Kalman operations - Endcap
1323  //==============================================================================
1324 
1325  void kalmanUpdateEndcap(const MPlexLS& psErr,
1326  const MPlexLV& psPar,
1327  const MPlexHS& msErr,
1328  const MPlexHV& msPar,
1329  MPlexLS& outErr,
1330  MPlexLV& outPar,
1331  const int N_proc) {
1332  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1333  }
1334 
1336  const MPlexLV& psPar,
1337  MPlexQI& Chg,
1338  const MPlexHS& msErr,
1339  const MPlexHV& msPar,
1340  MPlexLS& outErr,
1341  MPlexLV& outPar,
1342  MPlexQI& outFailFlag,
1343  const int N_proc,
1344  const PropagationFlags& propFlags,
1345  const bool propToHit) {
1346  if (propToHit) {
1347  MPlexLS propErr;
1348  MPlexLV propPar;
1349  MPlexQF msZ;
1350 #pragma omp simd
1351  for (int n = 0; n < NN; ++n) {
1352  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
1353  }
1354 
1355  propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
1356 
1357  kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1358  } else {
1359  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
1360  }
1361  for (int n = 0; n < NN; ++n) {
1362  if (outPar.At(n, 3, 0) < 0) {
1363  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1364  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1365  }
1366  }
1367  }
1368 
1369  //------------------------------------------------------------------------------
1370 
1372  const MPlexLV& psPar,
1373  const MPlexQI& inChg,
1374  const MPlexHS& msErr,
1375  const MPlexHV& msPar,
1376  MPlexQF& outChi2,
1377  const int N_proc) {
1378  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1379  }
1380 
1382  const MPlexLV& psPar,
1383  const MPlexQI& inChg,
1384  const MPlexHS& msErr,
1385  const MPlexHV& msPar,
1386  MPlexQF& outChi2,
1387  MPlexLV& propPar,
1388  MPlexQI& outFailFlag,
1389  const int N_proc,
1390  const PropagationFlags& propFlags,
1391  const bool propToHit) {
1392  propPar = psPar;
1393  if (propToHit) {
1394  MPlexLS propErr;
1395  MPlexQF msZ;
1396 #pragma omp simd
1397  for (int n = 0; n < NN; ++n) {
1398  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
1399  }
1400 
1401  propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
1402 
1403  kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1404  } else {
1405  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
1406  }
1407  }
1408 
1409  //------------------------------------------------------------------------------
1410 
1411  void kalmanOperationEndcap(const int kfOp,
1412  const MPlexLS& psErr,
1413  const MPlexLV& psPar,
1414  const MPlexHS& msErr,
1415  const MPlexHV& msPar,
1416  MPlexLS& outErr,
1417  MPlexLV& outPar,
1418  MPlexQF& outChi2,
1419  const int N_proc) {
1420 #ifdef DEBUG
1421  {
1422  dmutex_guard;
1423  printf("updateParametersEndcapMPlex\n");
1424  printf("psPar:\n");
1425  for (int i = 0; i < 6; ++i) {
1426  printf("%8f ", psPar.constAt(0, 0, i));
1427  printf("\n");
1428  }
1429  printf("\n");
1430  printf("msPar:\n");
1431  for (int i = 0; i < 3; ++i) {
1432  printf("%8f ", msPar.constAt(0, 0, i));
1433  printf("\n");
1434  }
1435  printf("\n");
1436  printf("psErr:\n");
1437  for (int i = 0; i < 6; ++i) {
1438  for (int j = 0; j < 6; ++j)
1439  printf("%8f ", psErr.constAt(0, i, j));
1440  printf("\n");
1441  }
1442  printf("\n");
1443  printf("msErr:\n");
1444  for (int i = 0; i < 3; ++i) {
1445  for (int j = 0; j < 3; ++j)
1446  printf("%8f ", msErr.constAt(0, i, j));
1447  printf("\n");
1448  }
1449  printf("\n");
1450  }
1451 #endif
1452 
1453  MPlex2V res;
1454  SubtractFirst2(msPar, psPar, res);
1455 
1456  MPlex2S resErr;
1457  AddIntoUpperLeft2x2(psErr, msErr, resErr);
1458 
1459 #ifdef DEBUG
1460  {
1461  dmutex_guard;
1462  printf("resErr:\n");
1463  for (int i = 0; i < 2; ++i) {
1464  for (int j = 0; j < 2; ++j)
1465  printf("%8f ", resErr.At(0, i, j));
1466  printf("\n");
1467  }
1468  printf("\n");
1469  }
1470 #endif
1471 
1472  //invert the 2x2 matrix
1474 
1475  if (kfOp & KFO_Calculate_Chi2) {
1476  Chi2Similarity(res, resErr, outChi2);
1477 
1478 #ifdef DEBUG
1479  {
1480  dmutex_guard;
1481  printf("resErr_loc (Inv):\n");
1482  for (int i = 0; i < 2; ++i) {
1483  for (int j = 0; j < 2; ++j)
1484  printf("%8f ", resErr.At(0, i, j));
1485  printf("\n");
1486  }
1487  printf("\n");
1488  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1489  }
1490 #endif
1491  }
1492 
1493  if (kfOp & KFO_Update_Params) {
1494  MPlexL2 K;
1495  KalmanGain(psErr, resErr, K);
1496 
1497  MultResidualsAdd(K, psPar, res, outPar);
1498 
1499  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
1500 
1501  KHC(K, psErr, outErr);
1502 
1503 #ifdef DEBUG
1504  {
1505  dmutex_guard;
1506  printf("outErr before subtract:\n");
1507  for (int i = 0; i < 6; ++i) {
1508  for (int j = 0; j < 6; ++j)
1509  printf("%8f ", outErr.At(0, i, j));
1510  printf("\n");
1511  }
1512  printf("\n");
1513  }
1514 #endif
1515 
1516  outErr.subtract(psErr, outErr);
1517 
1518 #ifdef DEBUG
1519  {
1520  dmutex_guard;
1521  printf("res:\n");
1522  for (int i = 0; i < 2; ++i) {
1523  printf("%8f ", res.At(0, i, 0));
1524  }
1525  printf("\n");
1526  printf("resErr (Inv):\n");
1527  for (int i = 0; i < 2; ++i) {
1528  for (int j = 0; j < 2; ++j)
1529  printf("%8f ", resErr.At(0, i, j));
1530  printf("\n");
1531  }
1532  printf("\n");
1533  printf("K:\n");
1534  for (int i = 0; i < 6; ++i) {
1535  for (int j = 0; j < 2; ++j)
1536  printf("%8f ", K.At(0, i, j));
1537  printf("\n");
1538  }
1539  printf("\n");
1540  printf("outPar:\n");
1541  for (int i = 0; i < 6; ++i) {
1542  printf("%8f ", outPar.At(0, i, 0));
1543  }
1544  printf("\n");
1545  printf("outErr:\n");
1546  for (int i = 0; i < 6; ++i) {
1547  for (int j = 0; j < 6; ++j)
1548  printf("%8f ", outErr.At(0, i, j));
1549  printf("\n");
1550  }
1551  printf("\n");
1552  }
1553 #endif
1554  }
1555  }
1556 
1557 } // end namespace mkfit
void propagateHelixToPlaneMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexHV &plPnt, const MPlexHV &plNrm, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Matriplex::Matriplex< float, HH, HH, NN > MPlexHH
Definition: Matrix.h:52
Matriplex::Matriplex< float, LL, LL, NN > MPlexLL
Definition: Matrix.h:48
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:436
#define CMS_SA_ALLOW
void propagateHelixToRMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msRad, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Definition: APVGainStruct.h:7
void kalmanOperationEndcap(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, HH, 1, NN > MPlexHV
Definition: Matrix.h:53
void kalmanUpdate(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanOperationPlane(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, LL, 2, NN > MPlexL2
Definition: Matrix.h:67
Matriplex::Matriplex< float, LL, 1, NN > MPlexLV
Definition: Matrix.h:49
void kalmanPropagateAndUpdate(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
assert(be >=bs)
__host__ __device__ VT * co
Definition: prefixScan.h:47
Definition: Electron.h:6
void squashPhiMPlex(MPlexLV &par, const int N_proc)
void kalmanOperation(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
void invertCramerSym(MPlexSym< T, D, N > &A, double *determ=nullptr)
Definition: MatriplexSym.h:418
void kalmanUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
constexpr Matriplex::idx_t NN
Definition: Matrix.h:43
void kalmanUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void kalmanPropagateAndUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
void kalmanPropagateAndUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
void kalmanPropagateAndComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< float, HH, 2, NN > MPlexH2
Definition: Matrix.h:68
d
Definition: ztail.py:151
void kalmanPropagateAndComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< int, 1, 1, NN > MPlexQI
Definition: Matrix.h:72
void kalmanPropagateAndComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< float, 2, HH, NN > MPlex2H
Definition: Matrix.h:69
void kalmanComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, 1, 1, NN > MPlexQF
Definition: Matrix.h:71
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
Matriplex::MatriplexSym< float, 2, NN > MPlex2S
Definition: Matrix.h:62
double b
Definition: hdecay.h:120
Matriplex::MatriplexSym< float, HH, NN > MPlexHS
Definition: Matrix.h:54
Matriplex::Matriplex< float, LL, HH, NN > MPlexLH
Definition: Matrix.h:64
Matriplex::Matriplex< float, 2, 1, NN > MPlex2V
Definition: Matrix.h:61
double a
Definition: hdecay.h:121
void propagateHelixToZMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msZ, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Matriplex::MatriplexSym< float, LL, NN > MPlexLS
Definition: Matrix.h:50
void kalmanComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
Definition: APVGainStruct.h:7
long double T
#define ASSUME_ALIGNED(a, b)