CMS 3D CMS Logo

KalmanUtilsMPlex.cc
Go to the documentation of this file.
1 #include "KalmanUtilsMPlex.h"
2 #include "PropagationMPlex.h"
3 
4 //#define DEBUG
5 #include "Debug.h"
6 
7 #include "KalmanUtilsMPlex.icc"
8 
10 
11 namespace {
12  using namespace mkfit;
13  using idx_t = Matriplex::idx_t;
14 
15  inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
16  // outPar = psPar + kalmanGain*(dPar)
17  // D = B A C
18  // where right half of kalman gain is 0
19 
20  // XXX Regenerate with a script.
21 
22  MultResidualsAdd_imp(A, B, C, D, 0, NN);
23  }
24 
25  inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
26  // outPar = psPar + kalmanGain*(dPar)
27  // D = B A C
28 
29  // XXX Regenerate with a script.
30 
31  typedef float T;
32  const idx_t N = NN;
33 
34  const T* a = A.fArray;
35  ASSUME_ALIGNED(a, 64);
36  const T* b = B.fArray;
37  ASSUME_ALIGNED(b, 64);
38  const T* c = C.fArray;
39  ASSUME_ALIGNED(c, 64);
40  T* d = D.fArray;
41  ASSUME_ALIGNED(d, 64);
42 
43 #pragma omp simd
44  for (idx_t n = 0; n < N; ++n) {
45  // generate loop (can also write it manually this time, it's not much)
46  d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
47  d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
48  d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
49  d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
50  d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
51  d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
52  }
53  }
54 
55  inline void MultResidualsAdd(const MPlex52& A, const MPlex5V& B, const MPlex2V& C, MPlex5V& D) {
56  // outPar = psPar + kalmanGain*(dPar)
57  // D = B A C
58 
59  // XXX Regenerate with a script.
60 
61  typedef float T;
62  const idx_t N = NN;
63 
64  const T* a = A.fArray;
65  ASSUME_ALIGNED(a, 64);
66  const T* b = B.fArray;
67  ASSUME_ALIGNED(b, 64);
68  const T* c = C.fArray;
69  ASSUME_ALIGNED(c, 64);
70  T* d = D.fArray;
71  ASSUME_ALIGNED(d, 64);
72 
73 #pragma omp simd
74  for (idx_t n = 0; n < N; ++n) {
75  // generate loop (can also write it manually this time, it's not much)
76  d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
77  d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
78  d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
79  d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
80  d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
81  }
82  }
83 
84  //------------------------------------------------------------------------------
85 
86  inline void Chi2Similarity(const MPlex2V& A, //resPar
87  const MPlex2S& C, //resErr
88  MPlexQF& D) //outChi2
89  {
90  // outChi2 = (resPar) * resErr * (resPar)
91  // D = A * C * A
92 
93  typedef float T;
94  const idx_t N = NN;
95 
96  const T* a = A.fArray;
97  ASSUME_ALIGNED(a, 64);
98  const T* c = C.fArray;
99  ASSUME_ALIGNED(c, 64);
100  T* d = D.fArray;
101  ASSUME_ALIGNED(d, 64);
102 
103 #pragma omp simd
104  for (idx_t n = 0; n < N; ++n) {
105  // generate loop (can also write it manually this time, it's not much)
106  d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
107  2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
108  }
109  }
110 
111  //------------------------------------------------------------------------------
112 
113  inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
114  // The rest of matrix is left untouched.
115 
116  typedef float T;
117  const idx_t N = NN;
118 
119  const T* a = A.fArray;
120  ASSUME_ALIGNED(a, 64);
121  const T* b = B.fArray;
122  ASSUME_ALIGNED(b, 64);
123  T* c = C.fArray;
124  ASSUME_ALIGNED(c, 64);
125 
126 #pragma omp simd
127  for (idx_t n = 0; n < N; ++n) {
128  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
129  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
130  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
131  c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
132  c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
133  c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
134  }
135  }
136 
137  inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
138  // The rest of matrix is left untouched.
139 
140  typedef float T;
141  const idx_t N = NN;
142 
143  const T* a = A.fArray;
144  ASSUME_ALIGNED(a, 64);
145  const T* b = B.fArray;
146  ASSUME_ALIGNED(b, 64);
147  T* c = C.fArray;
148  ASSUME_ALIGNED(c, 64);
149 
150 #pragma omp simd
151  for (idx_t n = 0; n < N; ++n) {
152  c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
153  c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
154  c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
155  }
156  }
157 
158  //------------------------------------------------------------------------------
159 
160  inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
161  // The rest of matrix is left untouched.
162 
163  typedef float T;
164  const idx_t N = NN;
165 
166  const T* a = A.fArray;
167  ASSUME_ALIGNED(a, 64);
168  const T* b = B.fArray;
169  ASSUME_ALIGNED(b, 64);
170  T* c = C.fArray;
171  ASSUME_ALIGNED(c, 64);
172 
173 #pragma omp simd
174  for (idx_t n = 0; n < N; ++n) {
175  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
176  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
177  c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
178  }
179  }
180 
181  inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
182  // The rest of matrix is left untouched.
183 
184  typedef float T;
185  const idx_t N = NN;
186 
187  const T* a = A.fArray;
188  ASSUME_ALIGNED(a, 64);
189  const T* b = B.fArray;
190  ASSUME_ALIGNED(b, 64);
191  T* c = C.fArray;
192  ASSUME_ALIGNED(c, 64);
193 
194 #pragma omp simd
195  for (idx_t n = 0; n < N; ++n) {
196  c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
197  c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
198  }
199  }
200 
201  //==============================================================================
202 
203  inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
204  // C = A * B, C is 3x3, A is 3x3 , B is 3x3 sym
205 
206  // Based on script generation and adapted to custom sizes.
207 
208  typedef float T;
209  const idx_t N = NN;
210 
211  const T* a00 = A00.fArray;
212  ASSUME_ALIGNED(a00, 64);
213  const T* a01 = A01.fArray;
214  ASSUME_ALIGNED(a01, 64);
215  const T* b = B.fArray;
216  ASSUME_ALIGNED(b, 64);
217  T* c = C.fArray;
218  ASSUME_ALIGNED(c, 64);
219 
220 #pragma omp simd
221  for (int n = 0; n < N; ++n) {
222  c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
223  c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
224  c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
225  c[3 * N + n] = b[3 * N + n];
226  c[4 * N + n] = b[4 * N + n];
227  c[5 * N + n] = b[5 * N + n];
228  c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
229  c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
230  c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
231  }
232  }
233 
234  inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
235  // C = A * B, C is 3x3 sym, A is 3x3 , B is 3x3
236 
237  // Based on script generation and adapted to custom sizes.
238 
239  typedef float T;
240  const idx_t N = NN;
241 
242  const T* a00 = A00.fArray;
243  ASSUME_ALIGNED(a00, 64);
244  const T* a01 = A01.fArray;
245  ASSUME_ALIGNED(a01, 64);
246  const T* b = B.fArray;
247  ASSUME_ALIGNED(b, 64);
248  T* c = C.fArray;
249  ASSUME_ALIGNED(c, 64);
250 
251 #pragma omp simd
252  for (int n = 0; n < N; ++n) {
253  c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
254  c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
255  c[2 * N + n] = b[5 * N + n];
256  }
257  }
258 
259  inline void RotateResidualsOnTangentPlane(const MPlexQF& R00, //r00
260  const MPlexQF& R01, //r01
261  const MPlexHV& A, //res_glo
262  MPlex2V& B) //res_loc
263  {
264  RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
265  }
266 
267  //==============================================================================
268 
269  /*
270  inline void ProjectResErr(const MPlex2H& A, const MPlexHS& B, MPlex2H& C) {
271  // C = A * B, C is 2x3, A is 2x3 , B is 3x3 sym
272 
273  //
274  // A 0 1 2
275  // 3 4 5
276  // B 0 1 3
277  // 1 2 4
278  // 3 4 5
279  //
280 
281  typedef float T;
282  const idx_t N = NN;
283 
284  const T* a = A.fArray;
285  ASSUME_ALIGNED(a, 64);
286  const T* b = B.fArray;
287  ASSUME_ALIGNED(b, 64);
288  T* c = C.fArray;
289  ASSUME_ALIGNED(c, 64);
290 
291 #pragma omp simd
292  for (int n = 0; n < N; ++n) {
293  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n];
294  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n];
295  c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n];
296  c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n];
297  c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
298  c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n];
299  }
300  }
301  */
302 
303  // inline void ProjectResErr(const MPlex2H& A, const MPlexLS& B, MPlex2H& C) {
304  template <class T1, class T2>
305  inline void ProjectResErr(const T1& A, const T2& B, MPlex2H& C) {
306  // C = A * B, C is 2x3, A is 2x3 , B is 3x3 sym
307 
308  /*
309  A 0 1 2
310  3 4 5
311  B 0 1 3
312  1 2 4
313  3 4 5
314  */
315 
316  typedef float T;
317  const idx_t N = NN;
318 
319  const T* a = A.fArray;
320  ASSUME_ALIGNED(a, 64);
321  const T* b = B.fArray;
322  ASSUME_ALIGNED(b, 64);
323  T* c = C.fArray;
324  ASSUME_ALIGNED(c, 64);
325 
326 #pragma omp simd
327  for (int n = 0; n < N; ++n) {
328  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n];
329  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n];
330  c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n];
331  c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n];
332  c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
333  c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n];
334  }
335  }
336 
337  //inline void ProjectResErrTransp(const MPlex2H& A, const MPlex2H& B, MPlex2S& C) {
338  template <class T1>
339  inline void ProjectResErrTransp(const T1& A, const MPlex2H& B, MPlex2S& C) {
340  // C = B * A^T, C is 2x2 sym, A is 2x3 (A^T is 3x2), B is 2x3
341 
342  /*
343  B 0 1 2
344  3 4 5
345  A^T 0 3
346  1 4
347  2 5
348  */
349 
350  typedef float T;
351  const idx_t N = NN;
352 
353  const T* a = A.fArray;
354  ASSUME_ALIGNED(a, 64);
355  const T* b = B.fArray;
356  ASSUME_ALIGNED(b, 64);
357  T* c = C.fArray;
358  ASSUME_ALIGNED(c, 64);
359 
360 #pragma omp simd
361  for (int n = 0; n < N; ++n) {
362  c[0 * N + n] = b[0 * N + n] * a[0 * N + n] + b[1 * N + n] * a[1 * N + n] + b[2 * N + n] * a[2 * N + n];
363  c[1 * N + n] = b[0 * N + n] * a[3 * N + n] + b[1 * N + n] * a[4 * N + n] + b[2 * N + n] * a[5 * N + n];
364  c[2 * N + n] = b[3 * N + n] * a[3 * N + n] + b[4 * N + n] * a[4 * N + n] + b[5 * N + n] * a[5 * N + n];
365  }
366  }
367 
368  inline void RotateVectorOnPlane(const MPlexHH& R, const MPlexHV& A, MPlexHV& B) {
369  // typedef float T;
370  // const idx_t N = NN;
371 
372  // const T* a = A.fArray;
373  // ASSUME_ALIGNED(a, 64);
374  // T* b = B.fArray;
375  // ASSUME_ALIGNED(b, 64);
376  // const T* r = R.fArray;
377  // ASSUME_ALIGNED(r, 64);
378 
379 #pragma omp simd
380  for (int n = 0; n < NN; ++n) {
381  B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
382  B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
383  B(n, 2, 0) = R(n, 2, 0) * A(n, 0, 0) + R(n, 2, 1) * A(n, 1, 0) + R(n, 2, 2) * A(n, 2, 0);
384  }
385  }
386 
387  inline void RotateVectorOnPlaneTransp(const MPlexHH& R, const MPlexHV& A, MPlexHV& B) {
388  // typedef float T;
389  // const idx_t N = NN;
390 
391  // const T* a = A.fArray;
392  // ASSUME_ALIGNED(a, 64);
393  // T* b = B.fArray;
394  // ASSUME_ALIGNED(b, 64);
395  // const T* r = R.fArray;
396  // ASSUME_ALIGNED(r, 64);
397 
398 #pragma omp simd
399  for (int n = 0; n < NN; ++n) {
400  B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 1, 0) * A(n, 1, 0) + R(n, 2, 0) * A(n, 2, 0);
401  B(n, 1, 0) = R(n, 0, 1) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 2, 1) * A(n, 2, 0);
402  B(n, 2, 0) = R(n, 0, 2) * A(n, 0, 0) + R(n, 1, 2) * A(n, 1, 0) + R(n, 2, 2) * A(n, 2, 0);
403  }
404  }
405 
406  template <typename T1, typename T2>
407  void RotateResidualsOnPlane(const T1& R, //prj - at least MPlex_2_3
408  const T2& A, //res_glo - at least MPlex_3_1 (vector)
409  MPlex2V& B) //res_loc - fixed as MPlex_2_1 (vector)
410  {
411 #pragma omp simd
412  for (int n = 0; n < NN; ++n) {
413  B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
414  B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
415  }
416  }
417 
418  inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
419  // HTG = rot * res_loc
420  // C = A * B
421 
422  // Based on script generation and adapted to custom sizes.
423 
424  typedef float T;
425  const idx_t N = NN;
426 
427  const T* a00 = A00.fArray;
428  ASSUME_ALIGNED(a00, 64);
429  const T* a01 = A01.fArray;
430  ASSUME_ALIGNED(a01, 64);
431  const T* b = B.fArray;
432  ASSUME_ALIGNED(b, 64);
433  T* c = C.fArray;
434  ASSUME_ALIGNED(c, 64);
435 
436 #pragma omp simd
437  for (int n = 0; n < N; ++n) {
438  c[0 * N + n] = a00[n] * b[0 * N + n];
439  c[1 * N + n] = a00[n] * b[1 * N + n];
440  c[2 * N + n] = 0.;
441  c[3 * N + n] = a01[n] * b[0 * N + n];
442  c[4 * N + n] = a01[n] * b[1 * N + n];
443  c[5 * N + n] = 0.;
444  c[6 * N + n] = b[1 * N + n];
445  c[7 * N + n] = b[2 * N + n];
446  c[8 * N + n] = 0.;
447  }
448  }
449 
450  inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
451  // C = A * B, C is 6x3, A is 6x6 sym , B is 3x3
452 
453  typedef float T;
454  const idx_t N = NN;
455 
456  const T* a = A.fArray;
457  ASSUME_ALIGNED(a, 64);
458  const T* b = B.fArray;
459  ASSUME_ALIGNED(b, 64);
460  T* c = C.fArray;
461  ASSUME_ALIGNED(c, 64);
462 
463 #pragma omp simd
464  for (int n = 0; n < N; ++n) {
465  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
466  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
467  c[2 * N + n] = 0;
468  c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
469  c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
470  c[5 * N + n] = 0;
471  c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
472  c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
473  c[8 * N + n] = 0;
474  c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
475  c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
476  c[11 * N + n] = 0;
477  c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
478  c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
479  c[14 * N + n] = 0;
480  c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
481  c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
482  c[17 * N + n] = 0;
483  }
484  }
485 
486  inline void KalmanHTG(const MPlex2H& A, const MPlex2S& B, MPlexH2& C) {
487  // HTG = prj^T * res_loc
488  // C = A^T * B
489 
490  /*
491  A^T 0 3
492  1 4
493  2 5
494  B 0 1
495  1 2
496  C 0 1
497  2 3
498  4 5
499  */
500 
501  typedef float T;
502  const idx_t N = NN;
503 
504  const T* a = A.fArray;
505  ASSUME_ALIGNED(a, 64);
506  const T* b = B.fArray;
507  ASSUME_ALIGNED(b, 64);
508  T* c = C.fArray;
509  ASSUME_ALIGNED(c, 64);
510 
511 #pragma omp simd
512  for (int n = 0; n < N; ++n) {
513  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[3 * N + n] * b[1 * N + n];
514  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[3 * N + n] * b[2 * N + n];
515  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n];
516  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n];
517  c[4 * N + n] = a[2 * N + n] * b[0 * N + n] + a[5 * N + n] * b[1 * N + n];
518  c[5 * N + n] = a[2 * N + n] * b[1 * N + n] + a[5 * N + n] * b[2 * N + n];
519  }
520  }
521 
522  inline void KalmanGain(const MPlexLS& A, const MPlexH2& B, MPlexL2& C) {
523  // C = A * B, C is 6x2, A is 6x6 sym , B is 3x2 (6x2 but half of it is zeros)
524 
525  /*
526  A 0 1 3 6 10 15
527  1 2 4 7 11 16
528  3 4 5 8 12 17
529  6 7 8 9 13 18
530  10 11 12 13 14 19
531  15 16 17 18 19 20
532  B 0 1
533  2 3
534  4 5
535  X X with X=0, so not even included in B
536  X X
537  X X
538  C 0 1
539  2 3
540  4 5
541  6 7
542  8 9
543  10 11
544  */
545 
546  typedef float T;
547  const idx_t N = NN;
548 
549  const T* a = A.fArray;
550  ASSUME_ALIGNED(a, 64);
551  const T* b = B.fArray;
552  ASSUME_ALIGNED(b, 64);
553  T* c = C.fArray;
554  ASSUME_ALIGNED(c, 64);
555 
556 #pragma omp simd
557  for (int n = 0; n < N; ++n) {
558  c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[2 * N + n] + a[3 * N + n] * b[4 * N + n];
559  c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[5 * N + n];
560  c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[2 * N + n] + a[4 * N + n] * b[4 * N + n];
561  c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[5 * N + n];
562  c[4 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
563  c[5 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[5 * N + n];
564  c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n];
565  c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[5 * N + n];
566  c[8 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[2 * N + n] + a[12 * N + n] * b[4 * N + n];
567  c[9 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[5 * N + n];
568  c[10 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[2 * N + n] + a[17 * N + n] * b[4 * N + n];
569  c[11 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[5 * N + n];
570  }
571  }
572 
573  inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) {
574  // C is transformed to align along y after rotation and rotated back
575 
576  typedef float T;
577  const idx_t N = NN;
578 
579  const T* r00 = R00.fArray;
580  ASSUME_ALIGNED(r00, 64);
581  const T* r01 = R01.fArray;
582  ASSUME_ALIGNED(r01, 64);
583  const T* ci = Ci.fArray;
584  ASSUME_ALIGNED(ci, 64);
585  T* co = Co.fArray;
586  ASSUME_ALIGNED(co, 64);
587 
588 #pragma omp simd
589  for (int n = 0; n < N; ++n) {
590  // a bit loopy to avoid temporaries
591  co[0 * N + n] =
592  r00[n] * r00[n] * ci[0 * N + n] + 2 * r00[n] * r01[n] * ci[1 * N + n] + r01[n] * r01[n] * ci[2 * N + n];
593  co[1 * N + n] = r00[n] * r01[n] * co[0 * N + n];
594  co[2 * N + n] = r01[n] * r01[n] * co[0 * N + n];
595  co[0 * N + n] = r00[n] * r00[n] * co[0 * N + n];
596 
597  co[3 * N + n] = r00[n] * ci[3 * N + n] + r01[n] * ci[4 * N + n];
598  co[4 * N + n] = r01[n] * co[3 * N + n];
599  co[3 * N + n] = r00[n] * co[3 * N + n];
600 
601  co[6 * N + n] = r00[n] * ci[6 * N + n] + r01[n] * ci[7 * N + n];
602  co[7 * N + n] = r01[n] * co[6 * N + n];
603  co[6 * N + n] = r00[n] * co[6 * N + n];
604 
605  co[10 * N + n] = r00[n] * ci[10 * N + n] + r01[n] * ci[11 * N + n];
606  co[11 * N + n] = r01[n] * co[10 * N + n];
607  co[10 * N + n] = r00[n] * co[10 * N + n];
608 
609  co[15 * N + n] = r00[n] * ci[15 * N + n] + r01[n] * ci[16 * N + n];
610  co[16 * N + n] = r01[n] * co[15 * N + n];
611  co[15 * N + n] = r00[n] * co[15 * N + n];
612  }
613  }
614 
615  void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
616  // C = A * B, C is 6x2, A is 6x6 sym , B is 2x2
617 
618  typedef float T;
619  const idx_t N = NN;
620 
621  const T* a = A.fArray;
622  ASSUME_ALIGNED(a, 64);
623  const T* b = B.fArray;
624  ASSUME_ALIGNED(b, 64);
625  T* c = C.fArray;
626  ASSUME_ALIGNED(c, 64);
627 
628 #include "KalmanGain62.ah"
629  }
630 
631  inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
632  // C = A * B, C is 6x6, A is 6x3 , B is 3x6
633  KHMult_imp(A, B00, B01, C, 0, NN);
634  }
635 
636  inline void KHMult(const MPlexL2& A, const MPlex2H& B, MPlexLL& C) {
637  // C = A * B, C is 6x6, A is 6x2 , B is 2x3 (2x6 but half of it made of zeros)
638 
639  /*
640  A 0 1
641  2 3
642  4 5
643  6 7
644  8 9
645  10 11
646  B 0 1 2 X X X with X=0 so not included in B
647  3 4 5 X X X
648  C 0 1 2 3 4 5
649  6 7 8 9 10 11
650  12 13 14 15 16 17
651  18 19 20 21 22 23
652  24 25 26 27 28 29
653  30 31 32 33 34 34
654  */
655 
656  // typedef float T;
657  // const idx_t N = NN;
658 
659  // const T* a = A.fArray;
660  // ASSUME_ALIGNED(a, 64);
661  // const T* b = B.fArray;
662  // ASSUME_ALIGNED(b, 64);
663  // T* c = C.fArray;
664  // ASSUME_ALIGNED(c, 64);
665 
666 #pragma omp simd
667  for (int n = 0; n < NN; ++n) {
668  C(n, 0, 0) = A(n, 0, 0) * B(n, 0, 0) + A(n, 0, 1) * B(n, 1, 0);
669  C(n, 0, 1) = A(n, 0, 0) * B(n, 0, 1) + A(n, 0, 1) * B(n, 1, 1);
670  C(n, 0, 2) = A(n, 0, 0) * B(n, 0, 2) + A(n, 0, 1) * B(n, 1, 2);
671  C(n, 0, 3) = 0;
672  C(n, 0, 4) = 0;
673  C(n, 0, 5) = 0;
674  C(n, 0, 6) = A(n, 1, 0) * B(n, 0, 0) + A(n, 1, 1) * B(n, 1, 0);
675  C(n, 0, 7) = A(n, 1, 0) * B(n, 0, 1) + A(n, 1, 1) * B(n, 1, 1);
676  C(n, 0, 8) = A(n, 1, 0) * B(n, 0, 2) + A(n, 1, 1) * B(n, 1, 2);
677  C(n, 0, 9) = 0;
678  C(n, 0, 10) = 0;
679  C(n, 0, 11) = 0;
680  C(n, 0, 12) = A(n, 2, 0) * B(n, 0, 0) + A(n, 2, 1) * B(n, 1, 0);
681  C(n, 0, 13) = A(n, 2, 0) * B(n, 0, 1) + A(n, 2, 1) * B(n, 1, 1);
682  C(n, 0, 14) = A(n, 2, 0) * B(n, 0, 2) + A(n, 2, 1) * B(n, 1, 2);
683  C(n, 0, 15) = 0;
684  C(n, 0, 16) = 0;
685  C(n, 0, 17) = 0;
686  C(n, 0, 18) = A(n, 3, 0) * B(n, 0, 0) + A(n, 3, 1) * B(n, 1, 0);
687  C(n, 0, 19) = A(n, 3, 0) * B(n, 0, 1) + A(n, 3, 1) * B(n, 1, 1);
688  C(n, 0, 20) = A(n, 3, 0) * B(n, 0, 2) + A(n, 3, 1) * B(n, 1, 2);
689  C(n, 0, 21) = 0;
690  C(n, 0, 22) = 0;
691  C(n, 0, 23) = 0;
692  C(n, 0, 24) = A(n, 4, 0) * B(n, 0, 0) + A(n, 4, 1) * B(n, 1, 0);
693  C(n, 0, 25) = A(n, 4, 0) * B(n, 0, 1) + A(n, 4, 1) * B(n, 1, 1);
694  C(n, 0, 26) = A(n, 4, 0) * B(n, 0, 2) + A(n, 4, 1) * B(n, 1, 2);
695  C(n, 0, 27) = 0;
696  C(n, 0, 28) = 0;
697  C(n, 0, 29) = 0;
698  C(n, 0, 30) = A(n, 5, 0) * B(n, 0, 0) + A(n, 5, 1) * B(n, 1, 0);
699  C(n, 0, 31) = A(n, 5, 0) * B(n, 0, 1) + A(n, 5, 1) * B(n, 1, 1);
700  C(n, 0, 32) = A(n, 5, 0) * B(n, 0, 2) + A(n, 5, 1) * B(n, 1, 2);
701  C(n, 0, 33) = 0;
702  C(n, 0, 34) = 0;
703  C(n, 0, 35) = 0;
704  }
705  }
706 
707  inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
708  // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym
709 
710  typedef float T;
711  const idx_t N = NN;
712 
713  const T* a = A.fArray;
714  ASSUME_ALIGNED(a, 64);
715  const T* b = B.fArray;
716  ASSUME_ALIGNED(b, 64);
717  T* c = C.fArray;
718  ASSUME_ALIGNED(c, 64);
719 
720 #include "KHC.ah"
721  }
722 
723  inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
724  // C = A * B, C is 6x6 sym, A is 6x2 , B is 6x6 sym
725 
726  typedef float T;
727  const idx_t N = NN;
728 
729  const T* a = A.fArray;
730  ASSUME_ALIGNED(a, 64);
731  const T* b = B.fArray;
732  ASSUME_ALIGNED(b, 64);
733  T* c = C.fArray;
734  ASSUME_ALIGNED(c, 64);
735 
736 #include "K62HC.ah"
737  }
738 
739  void JacCCS2Loc(const MPlex55& A, const MPlex56& B, MPlex56& C) {
740  typedef float T;
741  const idx_t N = NN;
742 
743  const T* a = A.fArray;
744  ASSUME_ALIGNED(a, 64);
745  const T* b = B.fArray;
746  ASSUME_ALIGNED(b, 64);
747  T* c = C.fArray;
748  ASSUME_ALIGNED(c, 64);
749 
750 #include "JacCCS2Loc.ah"
751  }
752 
753  void PsErrLoc(const MPlex56& A, const MPlexLS& B, MPlex56& C) {
754  // C = A * B
755 
756  typedef float T;
757  const Matriplex::idx_t N = NN;
758 
759  const T* a = A.fArray;
760  ASSUME_ALIGNED(a, 64);
761  const T* b = B.fArray;
762  ASSUME_ALIGNED(b, 64);
763  T* c = C.fArray;
764  ASSUME_ALIGNED(c, 64);
765 
766 #include "PsErrLoc.ah"
767  }
768 
769  void PsErrLocTransp(const MPlex56& B, const MPlex56& A, MPlex5S& C) {
770  // C = B * AT;
771 
772  typedef float T;
773  const Matriplex::idx_t N = NN;
774 
775  const T* a = A.fArray;
776  ASSUME_ALIGNED(a, 64);
777  const T* b = B.fArray;
778  ASSUME_ALIGNED(b, 64);
779  T* c = C.fArray;
780  ASSUME_ALIGNED(c, 64);
781 
782 #include "PsErrLocTransp.ah"
783  }
784 
785  void PsErrLocUpd(const MPlex55& A, const MPlex5S& B, MPlex5S& C) {
786  // C = A * B;
787 
788  typedef float T;
789  const Matriplex::idx_t N = NN;
790 
791  const T* a = A.fArray;
792  ASSUME_ALIGNED(a, 64);
793  const T* b = B.fArray;
794  ASSUME_ALIGNED(b, 64);
795  T* c = C.fArray;
796  ASSUME_ALIGNED(c, 64);
797 
798 #include "PsErrLocUpd.ah"
799  }
800 
801  void JacLoc2CCS(const MPlex65& A, const MPlex55& B, MPlex65& C) {
802  // C = A * B;
803 
804  typedef float T;
805  const Matriplex::idx_t N = NN;
806 
807  const T* a = A.fArray;
808  ASSUME_ALIGNED(a, 64);
809  const T* b = B.fArray;
810  ASSUME_ALIGNED(b, 64);
811  T* c = C.fArray;
812  ASSUME_ALIGNED(c, 64);
813 
814 #include "JacLoc2CCS.ah"
815  }
816 
817  void OutErrCCS(const MPlex65& A, const MPlex5S& B, MPlex65& C) {
818  // C = A * B
819 
820  typedef float T;
821  const Matriplex::idx_t N = NN;
822 
823  const T* a = A.fArray;
824  ASSUME_ALIGNED(a, 64);
825  const T* b = B.fArray;
826  ASSUME_ALIGNED(b, 64);
827  T* c = C.fArray;
828  ASSUME_ALIGNED(c, 64);
829 
830 #include "OutErrCCS.ah"
831  }
832 
833  void OutErrCCSTransp(const MPlex65& B, const MPlex65& A, MPlexLS& C) {
834  // C = B * AT;
835 
836  typedef float T;
837  const Matriplex::idx_t N = NN;
838 
839  const T* a = A.fArray;
840  ASSUME_ALIGNED(a, 64);
841  const T* b = B.fArray;
842  ASSUME_ALIGNED(b, 64);
843  T* c = C.fArray;
844  ASSUME_ALIGNED(c, 64);
845 
846 #include "OutErrCCSTransp.ah"
847  }
848 
849  //Warning: MultFull is not vectorized!
850  template <typename T1, typename T2, typename T3>
851  void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
852 #ifdef DEBUG
853  assert(nja == nib);
854  assert(nia == nic);
855  assert(njb == njc);
856 #endif
857  for (int n = 0; n < NN; ++n) {
858  for (int i = 0; i < nia; ++i) {
859  for (int j = 0; j < njb; ++j) {
860  C(n, i, j) = 0.;
861  for (int k = 0; k < nja; ++k)
862  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
863  }
864  }
865  }
866  }
867 
868  //Warning: MultTranspFull is not vectorized!
869  // (careful about which one is transposed, I think rows and cols are swapped and the one that is transposed is B)
870  template <typename T1, typename T2, typename T3>
871  void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
872 #ifdef DEBUG
873  assert(nja == njb);
874  assert(nia == nic);
875  assert(nib == njc);
876 #endif
877  for (int n = 0; n < NN; ++n) {
878  for (int i = 0; i < nia; ++i) {
879  for (int j = 0; j < nib; ++j) {
880  C(n, i, j) = 0.;
881  for (int k = 0; k < nja; ++k)
882  C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
883  }
884  }
885  }
886  }
887 
888 } // namespace
889 
890 //==============================================================================
891 // Kalman operations - common dummy variables
892 //==============================================================================
893 
894 namespace {
895  // Dummy variables for parameter consistency to kalmanOperation.
896  // Through KalmanFilterOperation enum parameter it is guaranteed that
897  // those will never get accessed in the code (read from or written into).
898 
899  CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
900  CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
901  CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
902 } // namespace
903 
904 namespace mkfit {
905 
906  //==============================================================================
907  // Kalman operations - Barrel
908  //==============================================================================
909 
910  void kalmanUpdate(const MPlexLS& psErr,
911  const MPlexLV& psPar,
912  const MPlexHS& msErr,
913  const MPlexHV& msPar,
914  MPlexLS& outErr,
915  MPlexLV& outPar,
916  const int N_proc) {
917  kalmanOperation(KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
918  }
919 
921  const MPlexLV& psPar,
922  MPlexQI& Chg,
923  const MPlexHS& msErr,
924  const MPlexHV& msPar,
925  MPlexLS& outErr,
926  MPlexLV& outPar,
927  MPlexQI& outFailFlag,
928  const int N_proc,
929  const PropagationFlags& propFlags,
930  const bool propToHit) {
931  if (propToHit) {
932  MPlexLS propErr;
933  MPlexLV propPar;
934  MPlexQF msRad;
935 #pragma omp simd
936  for (int n = 0; n < NN; ++n) {
937  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
938  }
939 
940  propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
941 
943  KFO_Update_Params | KFO_Local_Cov, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
944  } else {
946  KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
947  }
948  for (int n = 0; n < NN; ++n) {
949  if (n < N_proc && outPar.At(n, 3, 0) < 0) {
950  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
951  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
952  }
953  }
954  }
955 
956  //------------------------------------------------------------------------------
957 
958  void kalmanComputeChi2(const MPlexLS& psErr,
959  const MPlexLV& psPar,
960  const MPlexQI& inChg,
961  const MPlexHS& msErr,
962  const MPlexHV& msPar,
963  MPlexQF& outChi2,
964  const int N_proc) {
965  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
966  }
967 
969  const MPlexLV& psPar,
970  const MPlexQI& inChg,
971  const MPlexHS& msErr,
972  const MPlexHV& msPar,
973  MPlexQF& outChi2,
974  MPlexLV& propPar,
975  MPlexQI& outFailFlag,
976  const int N_proc,
977  const PropagationFlags& propFlags,
978  const bool propToHit) {
979  propPar = psPar;
980  if (propToHit) {
981  MPlexLS propErr;
982  MPlexQF msRad;
983 #pragma omp simd
984  for (int n = 0; n < NN; ++n) {
985  if (n < N_proc) {
986  msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
987  } else {
988  msRad.At(n, 0, 0) = 0.0f;
989  }
990  }
991 
992  propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
993 
994  kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
995  } else {
996  kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
997  }
998  }
999 
1000  //------------------------------------------------------------------------------
1001 
1002  void kalmanOperation(const int kfOp,
1003  const MPlexLS& psErr,
1004  const MPlexLV& psPar,
1005  const MPlexHS& msErr,
1006  const MPlexHV& msPar,
1007  MPlexLS& outErr,
1008  MPlexLV& outPar,
1009  MPlexQF& outChi2,
1010  const int N_proc) {
1011 #ifdef DEBUG
1012  {
1013  dmutex_guard;
1014  printf("psPar:\n");
1015  for (int i = 0; i < 6; ++i) {
1016  printf("%8f ", psPar.constAt(0, 0, i));
1017  printf("\n");
1018  }
1019  printf("\n");
1020  printf("psErr:\n");
1021  for (int i = 0; i < 6; ++i) {
1022  for (int j = 0; j < 6; ++j)
1023  printf("%8f ", psErr.constAt(0, i, j));
1024  printf("\n");
1025  }
1026  printf("\n");
1027  printf("msPar:\n");
1028  for (int i = 0; i < 3; ++i) {
1029  printf("%8f ", msPar.constAt(0, 0, i));
1030  printf("\n");
1031  }
1032  printf("\n");
1033  printf("msErr:\n");
1034  for (int i = 0; i < 3; ++i) {
1035  for (int j = 0; j < 3; ++j)
1036  printf("%8f ", msErr.constAt(0, i, j));
1037  printf("\n");
1038  }
1039  printf("\n");
1040  }
1041 #endif
1042 
1043  // Rotate global point on tangent plane to cylinder
1044  // Tangent point is half way between hit and propagate position
1045 
1046  // Rotation matrix
1047  // rotT00 0 rotT01
1048  // rotT01 0 -rotT00
1049  // 0 1 0
1050  // Minimize temporaries: only two float are needed!
1051 
1052  MPlexQF rotT00;
1053  MPlexQF rotT01;
1054  for (int n = 0; n < NN; ++n) {
1055  if (n < N_proc) {
1056  const float r = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
1057  rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
1058  rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
1059  } else {
1060  rotT00.At(n, 0, 0) = 0.0f;
1061  rotT01.At(n, 0, 0) = 0.0f;
1062  }
1063  }
1064 
1065  MPlexHV res_glo; //position residual in global coordinates
1066  SubtractFirst3(msPar, psPar, res_glo);
1067 
1068  MPlexHS resErr_glo; //covariance sum in global position coordinates
1069  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
1070 
1071  MPlex2V res_loc; //position residual in local coordinates
1072  RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
1073  MPlex2S resErr_loc; //covariance sum in local position coordinates
1074  MPlexHH tempHH;
1075  ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
1076  ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
1077 
1078 #ifdef DEBUG
1079  {
1080  dmutex_guard;
1081  printf("res_glo:\n");
1082  for (int i = 0; i < 3; ++i) {
1083  printf("%8f ", res_glo.At(0, i, 0));
1084  }
1085  printf("\n");
1086  printf("resErr_glo:\n");
1087  for (int i = 0; i < 3; ++i) {
1088  for (int j = 0; j < 3; ++j)
1089  printf("%8f ", resErr_glo.At(0, i, j));
1090  printf("\n");
1091  }
1092  printf("\n");
1093  printf("res_loc:\n");
1094  for (int i = 0; i < 2; ++i) {
1095  printf("%8f ", res_loc.At(0, i, 0));
1096  }
1097  printf("\n");
1098  printf("tempHH:\n");
1099  for (int i = 0; i < 3; ++i) {
1100  for (int j = 0; j < 3; ++j)
1101  printf("%8f ", tempHH.At(0, i, j));
1102  printf("\n");
1103  }
1104  printf("\n");
1105  printf("resErr_loc:\n");
1106  for (int i = 0; i < 2; ++i) {
1107  for (int j = 0; j < 2; ++j)
1108  printf("%8f ", resErr_loc.At(0, i, j));
1109  printf("\n");
1110  }
1111  printf("\n");
1112  }
1113 #endif
1114 
1115  //invert the 2x2 matrix
1116  Matriplex::invertCramerSym(resErr_loc);
1117 
1118  if (kfOp & KFO_Calculate_Chi2) {
1119  Chi2Similarity(res_loc, resErr_loc, outChi2);
1120 
1121 #ifdef DEBUG
1122  {
1123  dmutex_guard;
1124  printf("resErr_loc (Inv):\n");
1125  for (int i = 0; i < 2; ++i) {
1126  for (int j = 0; j < 2; ++j)
1127  printf("%8f ", resErr_loc.At(0, i, j));
1128  printf("\n");
1129  }
1130  printf("\n");
1131  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1132  }
1133 #endif
1134  }
1135 
1136  if (kfOp & KFO_Update_Params) {
1137  MPlexLS psErrLoc = psErr;
1138  if (kfOp & KFO_Local_Cov)
1139  CovXYconstrain(rotT00, rotT01, psErr, psErrLoc);
1140 
1141  MPlexLH K; // kalman gain, fixme should be L2
1142  KalmanHTG(rotT00, rotT01, resErr_loc, tempHH); // intermediate term to get kalman gain (H^T*G)
1143  KalmanGain(psErrLoc, tempHH, K);
1144 
1145  MultResidualsAdd(K, psPar, res_loc, outPar);
1146 
1147  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
1148 
1149  MPlexLL tempLL;
1150  KHMult(K, rotT00, rotT01, tempLL);
1151  KHC(tempLL, psErrLoc, outErr);
1152  outErr.subtract(psErrLoc, outErr);
1153 
1154 #ifdef DEBUG
1155  {
1156  dmutex_guard;
1157  if (kfOp & KFO_Local_Cov) {
1158  printf("psErrLoc:\n");
1159  for (int i = 0; i < 6; ++i) {
1160  for (int j = 0; j < 6; ++j)
1161  printf("% 8e ", psErrLoc.At(0, i, j));
1162  printf("\n");
1163  }
1164  printf("\n");
1165  }
1166  printf("resErr_loc (Inv):\n");
1167  for (int i = 0; i < 2; ++i) {
1168  for (int j = 0; j < 2; ++j)
1169  printf("%8f ", resErr_loc.At(0, i, j));
1170  printf("\n");
1171  }
1172  printf("\n");
1173  printf("tempHH:\n");
1174  for (int i = 0; i < 3; ++i) {
1175  for (int j = 0; j < 3; ++j)
1176  printf("%8f ", tempHH.At(0, i, j));
1177  printf("\n");
1178  }
1179  printf("\n");
1180  printf("K:\n");
1181  for (int i = 0; i < 6; ++i) {
1182  for (int j = 0; j < 3; ++j)
1183  printf("%8f ", K.At(0, i, j));
1184  printf("\n");
1185  }
1186  printf("\n");
1187  printf("tempLL:\n");
1188  for (int i = 0; i < 6; ++i) {
1189  for (int j = 0; j < 6; ++j)
1190  printf("%8f ", tempLL.At(0, i, j));
1191  printf("\n");
1192  }
1193  printf("\n");
1194  printf("outPar:\n");
1195  for (int i = 0; i < 6; ++i) {
1196  printf("%8f ", outPar.At(0, i, 0));
1197  }
1198  printf("\n");
1199  printf("outErr:\n");
1200  for (int i = 0; i < 6; ++i) {
1201  for (int j = 0; j < 6; ++j)
1202  printf("%8f ", outErr.At(0, i, j));
1203  printf("\n");
1204  }
1205  printf("\n");
1206  }
1207 #endif
1208  }
1209  }
1210 
1211  //==============================================================================
1212  // Kalman operations - Plane
1213  //==============================================================================
1214 
1215  void kalmanUpdatePlane(const MPlexLS& psErr,
1216  const MPlexLV& psPar,
1217  const MPlexQI& Chg,
1218  const MPlexHS& msErr,
1219  const MPlexHV& msPar,
1220  const MPlexHV& plNrm,
1221  const MPlexHV& plDir,
1222  const MPlexHV& plPnt,
1223  MPlexLS& outErr,
1224  MPlexLV& outPar,
1225  const int N_proc) {
1227  psErr,
1228  psPar,
1229  Chg,
1230  msErr,
1231  msPar,
1232  plNrm,
1233  plDir,
1234  plPnt,
1235  outErr,
1236  outPar,
1237  dummy_chi2,
1238  N_proc);
1239  }
1240 
1242  const MPlexLV& psPar,
1243  MPlexQI& Chg,
1244  const MPlexHS& msErr,
1245  const MPlexHV& msPar,
1246  const MPlexHV& plNrm,
1247  const MPlexHV& plDir,
1248  const MPlexHV& plPnt,
1249  MPlexLS& outErr,
1250  MPlexLV& outPar,
1251  MPlexQI& outFailFlag,
1252  const int N_proc,
1253  const PropagationFlags& propFlags,
1254  const bool propToHit) {
1255  if (propToHit) {
1256  MPlexLS propErr;
1257  MPlexLV propPar;
1258  propagateHelixToPlaneMPlex(psErr, psPar, Chg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1259 
1261  propErr,
1262  propPar,
1263  Chg,
1264  msErr,
1265  msPar,
1266  plNrm,
1267  plDir,
1268  plPnt,
1269  outErr,
1270  outPar,
1271  dummy_chi2,
1272  N_proc);
1273  } else {
1275  psErr,
1276  psPar,
1277  Chg,
1278  msErr,
1279  msPar,
1280  plNrm,
1281  plDir,
1282  plPnt,
1283  outErr,
1284  outPar,
1285  dummy_chi2,
1286  N_proc);
1287  }
1288  for (int n = 0; n < NN; ++n) {
1289  if (outPar.At(n, 3, 0) < 0) {
1290  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1291  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1292  }
1293  }
1294  }
1295 
1296  //------------------------------------------------------------------------------
1297 
1298  void kalmanComputeChi2Plane(const MPlexLS& psErr,
1299  const MPlexLV& psPar,
1300  const MPlexQI& inChg,
1301  const MPlexHS& msErr,
1302  const MPlexHV& msPar,
1303  const MPlexHV& plNrm,
1304  const MPlexHV& plDir,
1305  const MPlexHV& plPnt,
1306  MPlexQF& outChi2,
1307  const int N_proc) {
1309  psErr,
1310  psPar,
1311  inChg,
1312  msErr,
1313  msPar,
1314  plNrm,
1315  plDir,
1316  plPnt,
1317  dummy_err,
1318  dummy_par,
1319  outChi2,
1320  N_proc);
1321  }
1322 
1324  const MPlexLV& psPar,
1325  const MPlexQI& inChg,
1326  const MPlexHS& msErr,
1327  const MPlexHV& msPar,
1328  const MPlexHV& plNrm,
1329  const MPlexHV& plDir,
1330  const MPlexHV& plPnt,
1331  MPlexQF& outChi2,
1332  MPlexLV& propPar,
1333  MPlexQI& outFailFlag,
1334  const int N_proc,
1335  const PropagationFlags& propFlags,
1336  const bool propToHit) {
1337  propPar = psPar;
1338  if (propToHit) {
1339  MPlexLS propErr;
1340  propagateHelixToPlaneMPlex(psErr, psPar, inChg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1341 
1343  propErr,
1344  propPar,
1345  inChg,
1346  msErr,
1347  msPar,
1348  plNrm,
1349  plDir,
1350  plPnt,
1351  dummy_err,
1352  dummy_par,
1353  outChi2,
1354  N_proc);
1355  } else {
1357  psErr,
1358  psPar,
1359  inChg,
1360  msErr,
1361  msPar,
1362  plNrm,
1363  plDir,
1364  plPnt,
1365  dummy_err,
1366  dummy_par,
1367  outChi2,
1368  N_proc);
1369  }
1370  }
1371 
1372  //------------------------------------------------------------------------------
1373 
1374  void kalmanOperationPlaneLocal(const int kfOp,
1375  const MPlexLS& psErr,
1376  const MPlexLV& psPar,
1377  const MPlexQI& inChg,
1378  const MPlexHS& msErr,
1379  const MPlexHV& msPar,
1380  const MPlexHV& plNrm,
1381  const MPlexHV& plDir,
1382  const MPlexHV& plPnt,
1383  MPlexLS& outErr,
1384  MPlexLV& outPar,
1385  MPlexQF& outChi2,
1386  const int N_proc) {
1387 #ifdef DEBUG
1388  {
1389  dmutex_guard;
1390  printf("psPar:\n");
1391  for (int i = 0; i < 6; ++i) {
1392  printf("%8f ", psPar.constAt(0, 0, i));
1393  printf("\n");
1394  }
1395  printf("\n");
1396  printf("psErr:\n");
1397  for (int i = 0; i < 6; ++i) {
1398  for (int j = 0; j < 6; ++j)
1399  printf("%8f ", psErr.constAt(0, i, j));
1400  printf("\n");
1401  }
1402  printf("\n");
1403  printf("msPar:\n");
1404  for (int i = 0; i < 3; ++i) {
1405  printf("%8f ", msPar.constAt(0, 0, i));
1406  printf("\n");
1407  }
1408  printf("\n");
1409  printf("msErr:\n");
1410  for (int i = 0; i < 3; ++i) {
1411  for (int j = 0; j < 3; ++j)
1412  printf("%8f ", msErr.constAt(0, i, j));
1413  printf("\n");
1414  }
1415  printf("\n");
1416  }
1417 #endif
1418 
1419  MPlexHH rot;
1420 #pragma omp simd
1421  for (int n = 0; n < NN; ++n) {
1422  rot(n, 0, 0) = plDir(n, 0, 0);
1423  rot(n, 0, 1) = plDir(n, 1, 0);
1424  rot(n, 0, 2) = plDir(n, 2, 0);
1425  rot(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
1426  rot(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
1427  rot(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
1428  rot(n, 2, 0) = plNrm(n, 0, 0);
1429  rot(n, 2, 1) = plNrm(n, 1, 0);
1430  rot(n, 2, 2) = plNrm(n, 2, 0);
1431  }
1432 
1433  // get local parameters
1434  MPlexHV xd;
1435 #pragma omp simd
1436  for (int n = 0; n < NN; ++n) {
1437  xd(n, 0, 0) = psPar(n, 0, 0) - plPnt(n, 0, 0);
1438  xd(n, 0, 1) = psPar(n, 0, 1) - plPnt(n, 0, 1);
1439  xd(n, 0, 2) = psPar(n, 0, 2) - plPnt(n, 0, 2);
1440  }
1441  MPlex2V xlo;
1442  RotateResidualsOnPlane(rot, xd, xlo);
1443 
1444  MPlexQF sinP, sinT, cosP, cosT, pt; //fixme VDT or something?
1445 #pragma omp simd
1446  for (int n = 0; n < NN; ++n) {
1447  pt(n, 0, 0) = 1.f / psPar(n, 3, 0);
1448  sinP(n, 0, 0) = std::sin(psPar(n, 4, 0));
1449  cosP(n, 0, 0) = std::cos(psPar(n, 4, 0));
1450  sinT(n, 0, 0) = std::sin(psPar(n, 5, 0));
1451  cosT(n, 0, 0) = std::cos(psPar(n, 5, 0));
1452  }
1453 
1454  MPlexHV pgl;
1455 #pragma omp simd
1456  for (int n = 0; n < NN; ++n) {
1457  pgl(n, 0, 0) = cosP(n, 0, 0) * pt(n, 0, 0);
1458  pgl(n, 0, 1) = sinP(n, 0, 0) * pt(n, 0, 0);
1459  pgl(n, 0, 2) = cosT(n, 0, 0) * pt(n, 0, 0) / sinT(n, 0, 0);
1460  }
1461 
1462  MPlexHV plo;
1463  RotateVectorOnPlane(rot, pgl, plo);
1464  MPlex5V lp;
1465 #pragma omp simd
1466  for (int n = 0; n < NN; ++n) {
1467  lp(n, 0, 0) = inChg(n, 0, 0) * psPar(n, 3, 0) * sinT(n, 0, 0);
1468  lp(n, 0, 1) = plo(n, 0, 0) / plo(n, 0, 2);
1469  lp(n, 0, 2) = plo(n, 0, 1) / plo(n, 0, 2);
1470  lp(n, 0, 3) = xlo(n, 0, 0);
1471  lp(n, 0, 4) = xlo(n, 0, 1);
1472  }
1473  MPlexQI pzSign;
1474 #pragma omp simd
1475  for (int n = 0; n < NN; ++n) {
1476  pzSign(n, 0, 0) = plo(n, 0, 2) > 0.f ? 1 : -1;
1477  }
1478 
1479  /*
1480  printf("rot:\n");
1481  for (int i = 0; i < 3; ++i) {
1482  for (int j = 0; j < 3; ++j)
1483  printf("%8f ", rot.At(0, i, j));
1484  printf("\n");
1485  }
1486  printf("\n");
1487  printf("plPnt:\n");
1488  for (int i = 0; i < 3; ++i) {
1489  printf("%8f ", plPnt.constAt(0, 0, i));
1490  }
1491  printf("\n");
1492  printf("xlo:\n");
1493  for (int i = 0; i < 2; ++i) {
1494  printf("%8f ", xlo.At(0, i, 0));
1495  }
1496  printf("\n");
1497  printf("pgl:\n");
1498  for (int i = 0; i < 3; ++i) {
1499  printf("%8f ", pgl.At(0, i, 0));
1500  }
1501  printf("\n");
1502  printf("plo:\n");
1503  for (int i = 0; i < 3; ++i) {
1504  printf("%8f ", plo.At(0, i, 0));
1505  }
1506  printf("\n");
1507  printf("lp:\n");
1508  for (int i = 0; i < 5; ++i) {
1509  printf("%8f ", lp.At(0, i, 0));
1510  }
1511  printf("\n");
1512  */
1513 
1514  //now we need the jacobian to convert from CCS to curvilinear
1515  // code from TrackState::jacobianCCSToCurvilinear
1516  MPlex56 jacCCS2Curv(0.f);
1517 #pragma omp simd
1518  for (int n = 0; n < NN; ++n) {
1519  jacCCS2Curv(n, 0, 3) = inChg(n, 0, 0) * sinT(n, 0, 0);
1520  jacCCS2Curv(n, 0, 5) = inChg(n, 0, 0) * cosT(n, 0, 0) * psPar(n, 3, 0);
1521  jacCCS2Curv(n, 1, 5) = -1.f;
1522  jacCCS2Curv(n, 2, 4) = 1.f;
1523  jacCCS2Curv(n, 3, 0) = -sinP(n, 0, 0);
1524  jacCCS2Curv(n, 3, 1) = cosP(n, 0, 0);
1525  jacCCS2Curv(n, 4, 0) = -cosP(n, 0, 0) * cosT(n, 0, 0);
1526  jacCCS2Curv(n, 4, 1) = -sinP(n, 0, 0) * cosT(n, 0, 0);
1527  jacCCS2Curv(n, 4, 2) = sinT(n, 0, 0);
1528  }
1529 
1530  //now we need the jacobian from curv to local
1531  // code from TrackingTools/AnalyticalJacobians/src/JacobianCurvilinearToLocal.cc
1532  MPlexHV un;
1533  MPlexHV vn;
1534 #pragma omp simd
1535  for (int n = 0; n < NN; ++n) {
1536  const float abslp00 = std::abs(lp(n, 0, 0));
1537  vn(n, 0, 2) = std::max(1.e-30f, abslp00 * pt(n, 0, 0));
1538  un(n, 0, 0) = -pgl(n, 0, 1) * abslp00 / vn(n, 0, 2);
1539  un(n, 0, 1) = pgl(n, 0, 0) * abslp00 / vn(n, 0, 2);
1540  un(n, 0, 2) = 0.f;
1541  vn(n, 0, 0) = -pgl(n, 0, 2) * abslp00 * un(n, 0, 1);
1542  vn(n, 0, 1) = pgl(n, 0, 2) * abslp00 * un(n, 0, 0);
1543  }
1544  MPlexHV u;
1545  RotateVectorOnPlane(rot, un, u);
1546  MPlexHV v;
1547  RotateVectorOnPlane(rot, vn, v);
1548  MPlex55 jacCurv2Loc(0.f);
1549 #pragma omp simd
1550  for (int n = 0; n < NN; ++n) {
1551  // fixme? //(pf.use_param_b_field ? 0.01f * Const::sol * Config::bFieldFromZR(psPar(n, 2, 0), hipo(psPar(n, 0, 0), psPar(n, 1, 0))) : 0.01f * Const::sol * Config::Bfield);
1552  const float bF = 0.01f * Const::sol * Config::Bfield;
1553  const float qh2 = bF * lp(n, 0, 0);
1554  const float t1r = std::sqrt(1.f + lp(n, 0, 1) * lp(n, 0, 1) + lp(n, 0, 2) * lp(n, 0, 2)) * pzSign(n, 0, 0);
1555  const float t2r = t1r * t1r;
1556  const float t3r = t1r * t2r;
1557  jacCurv2Loc(n, 0, 0) = 1.f;
1558  jacCurv2Loc(n, 1, 1) = -u(n, 0, 1) * t2r;
1559  jacCurv2Loc(n, 1, 2) = v(n, 0, 1) * vn(n, 0, 2) * t2r;
1560  jacCurv2Loc(n, 2, 1) = u(n, 0, 0) * t2r;
1561  jacCurv2Loc(n, 2, 2) = -v(n, 0, 0) * vn(n, 0, 2) * t2r;
1562  jacCurv2Loc(n, 3, 3) = v(n, 0, 1) * t1r;
1563  jacCurv2Loc(n, 3, 4) = -u(n, 0, 1) * t1r;
1564  jacCurv2Loc(n, 4, 3) = -v(n, 0, 0) * t1r;
1565  jacCurv2Loc(n, 4, 4) = u(n, 0, 0) * t1r;
1566  const float cosz = -vn(n, 0, 2) * qh2;
1567  const float ui = u(n, 0, 2) * t3r;
1568  const float vi = v(n, 0, 2) * t3r;
1569  jacCurv2Loc(n, 1, 3) = -ui * v(n, 0, 1) * cosz;
1570  jacCurv2Loc(n, 1, 4) = -vi * v(n, 0, 1) * cosz;
1571  jacCurv2Loc(n, 2, 3) = ui * v(n, 0, 0) * cosz;
1572  jacCurv2Loc(n, 2, 4) = vi * v(n, 0, 0) * cosz;
1573  //
1574  }
1575 
1576  // jacobian for converting from CCS to Loc (via Curv)
1577  MPlex56 jacCCS2Loc;
1578  JacCCS2Loc(jacCurv2Loc, jacCCS2Curv, jacCCS2Loc);
1579 
1580  // local error!
1581  MPlex5S psErrLoc;
1582  MPlex56 temp56;
1583  PsErrLoc(jacCCS2Loc, psErr, temp56);
1584  PsErrLocTransp(temp56, jacCCS2Loc, psErrLoc);
1585 
1586  MPlexHV md;
1587 #pragma omp simd
1588  for (int n = 0; n < NN; ++n) {
1589  md(n, 0, 0) = msPar(n, 0, 0) - plPnt(n, 0, 0);
1590  md(n, 0, 1) = msPar(n, 0, 1) - plPnt(n, 0, 1);
1591  md(n, 0, 2) = msPar(n, 0, 2) - plPnt(n, 0, 2);
1592  }
1593  MPlex2V mslo;
1594  RotateResidualsOnPlane(rot, md, mslo);
1595 
1596  MPlex2V res_loc; //position residual in local coordinates
1597 #pragma omp simd
1598  for (int n = 0; n < NN; ++n) {
1599  res_loc(n, 0, 0) = mslo(n, 0, 0) - xlo(n, 0, 0);
1600  res_loc(n, 0, 1) = mslo(n, 0, 1) - xlo(n, 0, 1);
1601  }
1602 
1603  MPlex2S msErr_loc;
1604  MPlex2H temp2Hmsl;
1605  ProjectResErr(rot, msErr, temp2Hmsl);
1606  ProjectResErrTransp(rot, temp2Hmsl, msErr_loc);
1607 
1608  MPlex2S resErr_loc; //covariance sum in local position coordinates
1609 #pragma omp simd
1610  for (int n = 0; n < NN; ++n) {
1611  resErr_loc(n, 0, 0) = psErrLoc(n, 3, 3) + msErr_loc(n, 0, 0);
1612  resErr_loc(n, 0, 1) = psErrLoc(n, 3, 4) + msErr_loc(n, 0, 1);
1613  resErr_loc(n, 1, 1) = psErrLoc(n, 4, 4) + msErr_loc(n, 1, 1);
1614  }
1615  /*
1616  printf("jacCCS2Curv:\n");
1617  for (int i = 0; i < 5; ++i) {
1618  for (int j = 0; j < 6; ++j)
1619  printf("%8f ", jacCCS2Curv.At(0, i, j));
1620  printf("\n");
1621  }
1622  printf("un:\n");
1623  for (int i = 0; i < 3; ++i) {
1624  printf("%8f ", un.At(0, i, 0));
1625  }
1626  printf("\n");
1627  printf("u:\n");
1628  for (int i = 0; i < 3; ++i) {
1629  printf("%8f ", u.At(0, i, 0));
1630  }
1631  printf("\n");
1632  printf("\n");
1633  printf("jacCurv2Loc:\n");
1634  for (int i = 0; i < 5; ++i) {
1635  for (int j = 0; j < 5; ++j)
1636  printf("%8f ", jacCurv2Loc.At(0, i, j));
1637  printf("\n");
1638  }
1639  printf("\n");
1640  printf("jacCCS2Loc:\n");
1641  for (int i = 0; i < 5; ++i) {
1642  for (int j = 0; j < 6; ++j)
1643  printf("%8f ", jacCCS2Loc.At(0, i, j));
1644  printf("\n");
1645  }
1646  printf("\n");
1647  printf("temp56:\n");
1648  for (int i = 0; i < 5; ++i) {
1649  for (int j = 0; j < 6; ++j)
1650  printf("%8f ", temp56.At(0, i, j));
1651  printf("\n");
1652  }
1653  printf("\n");
1654  printf("psErrLoc:\n");
1655  for (int i = 0; i < 5; ++i) {
1656  for (int j = 0; j < 5; ++j)
1657  printf("%8f ", psErrLoc.At(0, i, j));
1658  printf("\n");
1659  }
1660  printf("\n");
1661  printf("res_loc:\n");
1662  for (int i = 0; i < 2; ++i) {
1663  printf("%8f ", res_loc.At(0, i, 0));
1664  }
1665  printf("\n");
1666  printf("resErr_loc:\n");
1667  for (int i = 0; i < 2; ++i) {
1668  for (int j = 0; j < 2; ++j)
1669  printf("%8f ", resErr_loc.At(0, i, j));
1670  printf("\n");
1671  }
1672  printf("\n");
1673  */
1674  //invert the 2x2 matrix
1675  Matriplex::invertCramerSym(resErr_loc);
1676 
1677  if (kfOp & KFO_Calculate_Chi2) {
1678  Chi2Similarity(res_loc, resErr_loc, outChi2);
1679 
1680 #ifdef DEBUG
1681  {
1682  dmutex_guard;
1683  printf("resErr_loc (Inv):\n");
1684  for (int i = 0; i < 2; ++i) {
1685  for (int j = 0; j < 2; ++j)
1686  printf("%8f ", resErr_loc.At(0, i, j));
1687  printf("\n");
1688  }
1689  printf("\n");
1690  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1691  }
1692 #endif
1693  }
1694 
1695  if (kfOp & KFO_Update_Params) {
1696  MPlex52 K; // kalman gain
1697 #pragma omp simd
1698  for (int n = 0; n < NN; ++n) {
1699 #pragma GCC unroll 5
1700  for (int j = 0; j < 5; ++j) {
1701  K(n, j, 0) = resErr_loc(n, 0, 0) * psErrLoc(n, j, 3) + resErr_loc(n, 0, 1) * psErrLoc(n, j, 4);
1702  K(n, j, 1) = resErr_loc(n, 0, 1) * psErrLoc(n, j, 3) + resErr_loc(n, 1, 1) * psErrLoc(n, j, 4);
1703  }
1704  }
1705 
1706  MPlex5V lp_upd;
1707  MultResidualsAdd(K, lp, res_loc, lp_upd);
1708 
1709  MPlex55 ImKH(0.f);
1710 #pragma omp simd
1711  for (int n = 0; n < NN; ++n) {
1712 #pragma GCC unroll 5
1713  for (int j = 0; j < 5; ++j) {
1714  ImKH(n, j, j) = 1.f;
1715  ImKH(n, j, 3) -= K(n, j, 0);
1716  ImKH(n, j, 4) -= K(n, j, 1);
1717  }
1718  }
1719  MPlex5S psErrLoc_upd;
1720  PsErrLocUpd(ImKH, psErrLoc, psErrLoc_upd);
1721 
1722  //convert local updated parameters into CCS
1723  MPlexHV lxu;
1724  MPlexHV lpu;
1725 #pragma omp simd
1726  for (int n = 0; n < NN; ++n) {
1727  lxu(n, 0, 0) = lp_upd(n, 0, 3);
1728  lxu(n, 0, 1) = lp_upd(n, 0, 4);
1729  lxu(n, 0, 2) = 0.f;
1730  lpu(n, 0, 2) =
1731  pzSign(n, 0, 0) / (std::max(std::abs(lp_upd(n, 0, 0)), 1.e-9f) *
1732  std::sqrt(1.f + lp_upd(n, 0, 1) * lp_upd(n, 0, 1) + lp_upd(n, 0, 2) * lp_upd(n, 0, 2)));
1733  lpu(n, 0, 0) = lpu(n, 0, 2) * lp_upd(n, 0, 1);
1734  lpu(n, 0, 1) = lpu(n, 0, 2) * lp_upd(n, 0, 2);
1735  }
1736  MPlexHV gxu;
1737  RotateVectorOnPlaneTransp(rot, lxu, gxu);
1738 #pragma omp simd
1739  for (int n = 0; n < NN; ++n) {
1740  gxu(n, 0, 0) += plPnt(n, 0, 0);
1741  gxu(n, 0, 1) += plPnt(n, 0, 1);
1742  gxu(n, 0, 2) += plPnt(n, 0, 2);
1743  }
1744  MPlexHV gpu;
1745  RotateVectorOnPlaneTransp(rot, lpu, gpu);
1746 
1747  MPlexQF p;
1748 #pragma omp simd
1749  for (int n = 0; n < NN; ++n) {
1750  pt(n, 0, 0) = std::sqrt(gpu.At(n, 0, 0) * gpu.At(n, 0, 0) + gpu.At(n, 0, 1) * gpu.At(n, 0, 1));
1751  p(n, 0, 0) = std::sqrt(pt.At(n, 0, 0) * pt.At(n, 0, 0) + gpu.At(n, 0, 2) * gpu.At(n, 0, 2));
1752  sinP(n, 0, 0) = gpu.At(n, 0, 1) / pt(n, 0, 0);
1753  cosP(n, 0, 0) = gpu.At(n, 0, 0) / pt(n, 0, 0);
1754  sinT(n, 0, 0) = pt(n, 0, 0) / p(n, 0, 0);
1755  cosT(n, 0, 0) = gpu.At(n, 0, 2) / p(n, 0, 0);
1756  }
1757 
1758 #pragma omp simd
1759  for (int n = 0; n < NN; ++n) {
1760  outPar(n, 0, 0) = gxu.At(n, 0, 0);
1761  outPar(n, 0, 1) = gxu.At(n, 0, 1);
1762  outPar(n, 0, 2) = gxu.At(n, 0, 2);
1763  outPar(n, 0, 3) = 1.f / pt(n, 0, 0);
1764  outPar(n, 0, 4) = getPhi(gpu.At(n, 0, 0), gpu.At(n, 0, 1)); //fixme VDT or something?
1765  outPar(n, 0, 5) = getTheta(pt(n, 0, 0), gpu.At(n, 0, 2));
1766  }
1767 
1768  //now we need the jacobian to convert from curvilinear to CCS
1769  // code from TrackState::jacobianCurvilinearToCCS
1770  MPlex65 jacCurv2CCS(0.f);
1771 #pragma omp simd
1772  for (int n = 0; n < NN; ++n) {
1773  jacCurv2CCS(n, 0, 3) = -sinP(n, 0, 0);
1774  jacCurv2CCS(n, 0, 4) = -cosT(n, 0, 0) * cosP(n, 0, 0);
1775  jacCurv2CCS(n, 1, 3) = cosP(n, 0, 0);
1776  jacCurv2CCS(n, 1, 4) = -cosT(n, 0, 0) * sinP(n, 0, 0);
1777  jacCurv2CCS(n, 2, 4) = sinT(n, 0, 0);
1778  jacCurv2CCS(n, 3, 0) = inChg(n, 0, 0) / sinT(n, 0, 0);
1779  jacCurv2CCS(n, 3, 1) = outPar(n, 3, 0) * cosT(n, 0, 0) / sinT(n, 0, 0);
1780  jacCurv2CCS(n, 4, 2) = 1.f;
1781  jacCurv2CCS(n, 5, 1) = -1.f;
1782  }
1783 
1784  //now we need the jacobian from local to curv
1785  // code from TrackingTools/AnalyticalJacobians/src/JacobianLocalToCurvilinear.cc
1786  MPlexHV tnl;
1787 #pragma omp simd
1788  for (int n = 0; n < NN; ++n) {
1789  const float abslpupd00 = std::max(std::abs(lp_upd(n, 0, 0)), 1.e-9f);
1790  tnl(n, 0, 0) = lpu(n, 0, 0) * abslpupd00;
1791  tnl(n, 0, 1) = lpu(n, 0, 1) * abslpupd00;
1792  tnl(n, 0, 2) = lpu(n, 0, 2) * abslpupd00;
1793  }
1794  MPlexHV tn;
1795  RotateVectorOnPlaneTransp(rot, tnl, tn);
1796 #pragma omp simd
1797  for (int n = 0; n < NN; ++n) {
1798  vn(n, 0, 2) = std::max(1.e-30f, std::sqrt(tn(n, 0, 0) * tn(n, 0, 0) + tn(n, 0, 1) * tn(n, 0, 1)));
1799  un(n, 0, 0) = -tn(n, 0, 1) / vn(n, 0, 2);
1800  un(n, 0, 1) = tn(n, 0, 0) / vn(n, 0, 2);
1801  un(n, 0, 2) = 0.f;
1802  vn(n, 0, 0) = -tn(n, 0, 2) * un(n, 0, 1);
1803  vn(n, 0, 1) = tn(n, 0, 2) * un(n, 0, 0);
1804  }
1805  MPlex55 jacLoc2Curv(0.f);
1806 #pragma omp simd
1807  for (int n = 0; n < NN; ++n) {
1808  // fixme? //(pf.use_param_b_field ? 0.01f * Const::sol * Config::bFieldFromZR(psPar(n, 2, 0), hipo(psPar(n, 0, 0), psPar(n, 1, 0))) : 0.01f * Const::sol * Config::Bfield);
1809  const float bF = 0.01f * Const::sol * Config::Bfield; //fixme: cache?
1810  const float qh2 = bF * lp_upd(n, 0, 0);
1811  const float cosl1 = 1.f / vn(n, 0, 2);
1812  const float uj = un(n, 0, 0) * rot(n, 0, 0) + un(n, 0, 1) * rot(n, 0, 1);
1813  const float uk = un(n, 0, 0) * rot(n, 1, 0) + un(n, 0, 1) * rot(n, 1, 1);
1814  const float vj = vn(n, 0, 0) * rot(n, 0, 0) + vn(n, 0, 1) * rot(n, 0, 1) + vn(n, 0, 2) * rot(n, 0, 2);
1815  const float vk = vn(n, 0, 0) * rot(n, 1, 0) + vn(n, 0, 1) * rot(n, 1, 1) + vn(n, 0, 2) * rot(n, 1, 2);
1816  const float cosz = vn(n, 0, 2) * qh2;
1817  jacLoc2Curv(n, 0, 0) = 1.f;
1818  jacLoc2Curv(n, 1, 1) = tnl(n, 0, 2) * vj;
1819  jacLoc2Curv(n, 1, 2) = tnl(n, 0, 2) * vk;
1820  jacLoc2Curv(n, 2, 1) = tnl(n, 0, 2) * uj * cosl1;
1821  jacLoc2Curv(n, 2, 2) = tnl(n, 0, 2) * uk * cosl1;
1822  jacLoc2Curv(n, 3, 3) = uj;
1823  jacLoc2Curv(n, 3, 4) = uk;
1824  jacLoc2Curv(n, 4, 3) = vj;
1825  jacLoc2Curv(n, 4, 4) = vk;
1826  jacLoc2Curv(n, 2, 3) = tnl(n, 0, 0) * (cosz * cosl1);
1827  jacLoc2Curv(n, 2, 4) = tnl(n, 0, 1) * (cosz * cosl1);
1828  }
1829 
1830  // jacobian for converting from Loc to CCS (via Curv)
1831  MPlex65 jacLoc2CCS;
1832  JacLoc2CCS(jacCurv2CCS, jacLoc2Curv, jacLoc2CCS);
1833 
1834  // CCS error!
1835  MPlex65 temp65;
1836  OutErrCCS(jacLoc2CCS, psErrLoc_upd, temp65);
1837  OutErrCCSTransp(temp65, jacLoc2CCS, outErr);
1838 
1839  /*
1840  printf("\n");
1841  printf("lp_upd:\n");
1842  for (int i = 0; i < 5; ++i) {
1843  printf("%8f ", lp_upd.At(0, i, 0));
1844  }
1845  printf("\n");
1846  printf("psErrLoc_upd:\n");
1847  for (int i = 0; i < 5; ++i) {
1848  for (int j = 0; j < 5; ++j)
1849  printf("%8f ", psErrLoc_upd.At(0, i, j));
1850  printf("\n");
1851  }
1852  printf("\n");
1853  printf("lxu:\n");
1854  for (int i = 0; i < 3; ++i) {
1855  printf("%8f ", lxu.At(0, i, 0));
1856  }
1857  printf("\n");
1858  printf("lpu:\n");
1859  for (int i = 0; i < 3; ++i) {
1860  printf("%8f ", lpu.At(0, i, 0));
1861  }
1862  printf("\n");
1863  printf("gxu:\n");
1864  for (int i = 0; i < 3; ++i) {
1865  printf("%8f ", gxu.At(0, i, 0));
1866  }
1867  printf("\n");
1868  printf("gpu:\n");
1869  for (int i = 0; i < 3; ++i) {
1870  printf("%8f ", gpu.At(0, i, 0));
1871  }
1872  printf("\n");
1873  printf("outPar:\n");
1874  for (int i = 0; i < 6; ++i) {
1875  printf("%8f ", outPar.At(0, i, 0));
1876  }
1877  printf("\n");
1878  printf("tnl:\n");
1879  for (int i = 0; i < 3; ++i) {
1880  printf("%8f ", tnl.At(0, i, 0));
1881  }
1882  printf("\n");
1883  printf("tn:\n");
1884  for (int i = 0; i < 3; ++i) {
1885  printf("%8f ", tn.At(0, i, 0));
1886  }
1887  printf("\n");
1888  printf("un:\n");
1889  for (int i = 0; i < 3; ++i) {
1890  printf("%8f ", un.At(0, i, 0));
1891  }
1892  printf("\n");
1893  printf("vn:\n");
1894  for (int i = 0; i < 3; ++i) {
1895  printf("%8f ", vn.At(0, i, 0));
1896  }
1897  printf("\n");
1898  printf("jacLoc2Curv:\n");
1899  for (int i = 0; i < 5; ++i) {
1900  for (int j = 0; j < 5; ++j)
1901  printf("%8f ", jacLoc2Curv.At(0, i, j));
1902  printf("\n");
1903  }
1904  printf("\n");
1905  printf("outErr:\n");
1906  for (int i = 0; i < 6; ++i) {
1907  for (int j = 0; j < 6; ++j)
1908  printf("%8f ", outErr.At(0, i, j));
1909  printf("\n");
1910  }
1911  printf("\n");
1912  */
1913 
1914 #ifdef DEBUG
1915  {
1916  dmutex_guard;
1917  if (kfOp & KFO_Local_Cov) {
1918  printf("psErrLoc_upd:\n");
1919  for (int i = 0; i < 5; ++i) {
1920  for (int j = 0; j < 5; ++j)
1921  printf("% 8e ", psErrLoc_upd.At(0, i, j));
1922  printf("\n");
1923  }
1924  printf("\n");
1925  }
1926  printf("resErr_loc (Inv):\n");
1927  for (int i = 0; i < 2; ++i) {
1928  for (int j = 0; j < 2; ++j)
1929  printf("%8f ", resErr_loc.At(0, i, j));
1930  printf("\n");
1931  }
1932  printf("\n");
1933  printf("K:\n");
1934  for (int i = 0; i < 6; ++i) {
1935  for (int j = 0; j < 2; ++j)
1936  printf("%8f ", K.At(0, i, j));
1937  printf("\n");
1938  }
1939  printf("\n");
1940  printf("outPar:\n");
1941  for (int i = 0; i < 6; ++i) {
1942  printf("%8f ", outPar.At(0, i, 0));
1943  }
1944  printf("\n");
1945  printf("outErr:\n");
1946  for (int i = 0; i < 6; ++i) {
1947  for (int j = 0; j < 6; ++j)
1948  printf("%8f ", outErr.At(0, i, j));
1949  printf("\n");
1950  }
1951  printf("\n");
1952  }
1953 #endif
1954  }
1955 
1956  return;
1957  }
1958 
1959  //------------------------------------------------------------------------------
1960 
1961  void kalmanOperationPlane(const int kfOp,
1962  const MPlexLS& psErr,
1963  const MPlexLV& psPar,
1964  const MPlexQI& inChg,
1965  const MPlexHS& msErr,
1966  const MPlexHV& msPar,
1967  const MPlexHV& plNrm,
1968  const MPlexHV& plDir,
1969  const MPlexHV& plPnt, //not used, can be removed (fixme)
1970  MPlexLS& outErr,
1971  MPlexLV& outPar,
1972  MPlexQF& outChi2,
1973  const int N_proc) {
1974 #ifdef DEBUG
1975  {
1976  dmutex_guard;
1977  printf("psPar:\n");
1978  for (int i = 0; i < 6; ++i) {
1979  printf("%8f ", psPar.constAt(0, 0, i));
1980  printf("\n");
1981  }
1982  printf("\n");
1983  printf("psErr:\n");
1984  for (int i = 0; i < 6; ++i) {
1985  for (int j = 0; j < 6; ++j)
1986  printf("%8f ", psErr.constAt(0, i, j));
1987  printf("\n");
1988  }
1989  printf("\n");
1990  printf("msPar:\n");
1991  for (int i = 0; i < 3; ++i) {
1992  printf("%8f ", msPar.constAt(0, 0, i));
1993  printf("\n");
1994  }
1995  printf("\n");
1996  printf("msErr:\n");
1997  for (int i = 0; i < 3; ++i) {
1998  for (int j = 0; j < 3; ++j)
1999  printf("%8f ", msErr.constAt(0, i, j));
2000  printf("\n");
2001  }
2002  printf("\n");
2003  }
2004 #endif
2005 
2006  // Rotate global point on local plane
2007 
2008  // Rotation matrix
2009  // D0 D1 D2
2010  // X0 X1 X2
2011  // N0 N1 N2
2012  // where D is the strip direction vector plDir, N is the normal plNrm, and X is the cross product between the two
2013 
2014  MPlex2H prj;
2015  for (int n = 0; n < NN; ++n) {
2016  prj(n, 0, 0) = plDir(n, 0, 0);
2017  prj(n, 0, 1) = plDir(n, 1, 0);
2018  prj(n, 0, 2) = plDir(n, 2, 0);
2019  prj(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
2020  prj(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
2021  prj(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
2022  }
2023 
2024  MPlexHV res_glo; //position residual in global coordinates
2025  SubtractFirst3(msPar, psPar, res_glo);
2026 
2027  MPlexHS resErr_glo; //covariance sum in global position coordinates
2028  AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
2029 
2030  MPlex2V res_loc; //position residual in local coordinates
2031  RotateResidualsOnPlane(prj, res_glo, res_loc);
2032  MPlex2S resErr_loc; //covariance sum in local position coordinates
2033  MPlex2H temp2H;
2034  ProjectResErr(prj, resErr_glo, temp2H);
2035  ProjectResErrTransp(prj, temp2H, resErr_loc);
2036 
2037 #ifdef DEBUG
2038  {
2039  dmutex_guard;
2040  printf("prj:\n");
2041  for (int i = 0; i < 2; ++i) {
2042  for (int j = 0; j < 3; ++j)
2043  printf("%8f ", prj.At(0, i, j));
2044  printf("\n");
2045  }
2046  printf("\n");
2047  printf("res_glo:\n");
2048  for (int i = 0; i < 3; ++i) {
2049  printf("%8f ", res_glo.At(0, i, 0));
2050  }
2051  printf("\n");
2052  printf("resErr_glo:\n");
2053  for (int i = 0; i < 3; ++i) {
2054  for (int j = 0; j < 3; ++j)
2055  printf("%8f ", resErr_glo.At(0, i, j));
2056  printf("\n");
2057  }
2058  printf("\n");
2059  printf("res_loc:\n");
2060  for (int i = 0; i < 2; ++i) {
2061  printf("%8f ", res_loc.At(0, i, 0));
2062  }
2063  printf("\n");
2064  printf("temp2H:\n");
2065  for (int i = 0; i < 2; ++i) {
2066  for (int j = 0; j < 3; ++j)
2067  printf("%8f ", temp2H.At(0, i, j));
2068  printf("\n");
2069  }
2070  printf("\n");
2071  printf("resErr_loc:\n");
2072  for (int i = 0; i < 2; ++i) {
2073  for (int j = 0; j < 2; ++j)
2074  printf("%8f ", resErr_loc.At(0, i, j));
2075  printf("\n");
2076  }
2077  printf("\n");
2078  }
2079 #endif
2080 
2081  //invert the 2x2 matrix
2082  Matriplex::invertCramerSym(resErr_loc);
2083 
2084  if (kfOp & KFO_Calculate_Chi2) {
2085  Chi2Similarity(res_loc, resErr_loc, outChi2);
2086 
2087 #ifdef DEBUG
2088  {
2089  dmutex_guard;
2090  printf("resErr_loc (Inv):\n");
2091  for (int i = 0; i < 2; ++i) {
2092  for (int j = 0; j < 2; ++j)
2093  printf("%8f ", resErr_loc.At(0, i, j));
2094  printf("\n");
2095  }
2096  printf("\n");
2097  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
2098  }
2099 #endif
2100  }
2101 
2102  if (kfOp & KFO_Update_Params) {
2103  MPlexLS psErrLoc = psErr;
2104 
2105  MPlexH2 tempH2;
2106  MPlexL2 K; // kalman gain
2107  KalmanHTG(prj, resErr_loc, tempH2); // intermediate term to get kalman gain (H^T*G)
2108  KalmanGain(psErrLoc, tempH2, K);
2109 
2110  MultResidualsAdd(K, psPar, res_loc, outPar);
2111 
2112  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
2113 
2114  MPlexLL tempLL;
2115  KHMult(K, prj, tempLL);
2116  KHC(tempLL, psErrLoc, outErr);
2117  outErr.subtract(psErrLoc, outErr);
2118 
2119 #ifdef DEBUG
2120  {
2121  dmutex_guard;
2122  if (kfOp & KFO_Local_Cov) {
2123  printf("psErrLoc:\n");
2124  for (int i = 0; i < 6; ++i) {
2125  for (int j = 0; j < 6; ++j)
2126  printf("% 8e ", psErrLoc.At(0, i, j));
2127  printf("\n");
2128  }
2129  printf("\n");
2130  }
2131  printf("resErr_loc (Inv):\n");
2132  for (int i = 0; i < 2; ++i) {
2133  for (int j = 0; j < 2; ++j)
2134  printf("%8f ", resErr_loc.At(0, i, j));
2135  printf("\n");
2136  }
2137  printf("\n");
2138  printf("tempH2:\n");
2139  for (int i = 0; i < 3; ++i) {
2140  for (int j = 0; j < 2; ++j)
2141  printf("%8f ", tempH2.At(0, i, j));
2142  printf("\n");
2143  }
2144  printf("\n");
2145  printf("K:\n");
2146  for (int i = 0; i < 6; ++i) {
2147  for (int j = 0; j < 2; ++j)
2148  printf("%8f ", K.At(0, i, j));
2149  printf("\n");
2150  }
2151  printf("\n");
2152  printf("tempLL:\n");
2153  for (int i = 0; i < 6; ++i) {
2154  for (int j = 0; j < 6; ++j)
2155  printf("%8f ", tempLL.At(0, i, j));
2156  printf("\n");
2157  }
2158  printf("\n");
2159  printf("outPar:\n");
2160  for (int i = 0; i < 6; ++i) {
2161  printf("%8f ", outPar.At(0, i, 0));
2162  }
2163  printf("\n");
2164  printf("outErr:\n");
2165  for (int i = 0; i < 6; ++i) {
2166  for (int j = 0; j < 6; ++j)
2167  printf("%8f ", outErr.At(0, i, j));
2168  printf("\n");
2169  }
2170  printf("\n");
2171  }
2172 #endif
2173  }
2174  }
2175 
2176  //==============================================================================
2177  // Kalman operations - Endcap
2178  //==============================================================================
2179 
2180  void kalmanUpdateEndcap(const MPlexLS& psErr,
2181  const MPlexLV& psPar,
2182  const MPlexHS& msErr,
2183  const MPlexHV& msPar,
2184  MPlexLS& outErr,
2185  MPlexLV& outPar,
2186  const int N_proc) {
2187  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2188  }
2189 
2191  const MPlexLV& psPar,
2192  MPlexQI& Chg,
2193  const MPlexHS& msErr,
2194  const MPlexHV& msPar,
2195  MPlexLS& outErr,
2196  MPlexLV& outPar,
2197  MPlexQI& outFailFlag,
2198  const int N_proc,
2199  const PropagationFlags& propFlags,
2200  const bool propToHit) {
2201  if (propToHit) {
2202  MPlexLS propErr;
2203  MPlexLV propPar;
2204  MPlexQF msZ;
2205 #pragma omp simd
2206  for (int n = 0; n < NN; ++n) {
2207  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
2208  }
2209 
2210  propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
2211 
2212  kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2213  } else {
2214  kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2215  }
2216  for (int n = 0; n < NN; ++n) {
2217  if (n < N_proc && outPar.At(n, 3, 0) < 0) {
2218  Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
2219  outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
2220  }
2221  }
2222  }
2223 
2224  //------------------------------------------------------------------------------
2225 
2227  const MPlexLV& psPar,
2228  const MPlexQI& inChg,
2229  const MPlexHS& msErr,
2230  const MPlexHV& msPar,
2231  MPlexQF& outChi2,
2232  const int N_proc) {
2233  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2234  }
2235 
2237  const MPlexLV& psPar,
2238  const MPlexQI& inChg,
2239  const MPlexHS& msErr,
2240  const MPlexHV& msPar,
2241  MPlexQF& outChi2,
2242  MPlexLV& propPar,
2243  MPlexQI& outFailFlag,
2244  const int N_proc,
2245  const PropagationFlags& propFlags,
2246  const bool propToHit) {
2247  propPar = psPar;
2248  if (propToHit) {
2249  MPlexLS propErr;
2250  MPlexQF msZ;
2251 #pragma omp simd
2252  for (int n = 0; n < NN; ++n) {
2253  msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
2254  }
2255 
2256  propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
2257 
2258  kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2259  } else {
2260  kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2261  }
2262  }
2263 
2264  //------------------------------------------------------------------------------
2265 
2266  void kalmanOperationEndcap(const int kfOp,
2267  const MPlexLS& psErr,
2268  const MPlexLV& psPar,
2269  const MPlexHS& msErr,
2270  const MPlexHV& msPar,
2271  MPlexLS& outErr,
2272  MPlexLV& outPar,
2273  MPlexQF& outChi2,
2274  const int N_proc) {
2275 #ifdef DEBUG
2276  {
2277  dmutex_guard;
2278  printf("updateParametersEndcapMPlex\n");
2279  printf("psPar:\n");
2280  for (int i = 0; i < 6; ++i) {
2281  printf("%8f ", psPar.constAt(0, 0, i));
2282  printf("\n");
2283  }
2284  printf("\n");
2285  printf("msPar:\n");
2286  for (int i = 0; i < 3; ++i) {
2287  printf("%8f ", msPar.constAt(0, 0, i));
2288  printf("\n");
2289  }
2290  printf("\n");
2291  printf("psErr:\n");
2292  for (int i = 0; i < 6; ++i) {
2293  for (int j = 0; j < 6; ++j)
2294  printf("%8f ", psErr.constAt(0, i, j));
2295  printf("\n");
2296  }
2297  printf("\n");
2298  printf("msErr:\n");
2299  for (int i = 0; i < 3; ++i) {
2300  for (int j = 0; j < 3; ++j)
2301  printf("%8f ", msErr.constAt(0, i, j));
2302  printf("\n");
2303  }
2304  printf("\n");
2305  }
2306 #endif
2307 
2308  MPlex2V res;
2309  SubtractFirst2(msPar, psPar, res);
2310 
2311  MPlex2S resErr;
2312  AddIntoUpperLeft2x2(psErr, msErr, resErr);
2313 
2314 #ifdef DEBUG
2315  {
2316  dmutex_guard;
2317  printf("resErr:\n");
2318  for (int i = 0; i < 2; ++i) {
2319  for (int j = 0; j < 2; ++j)
2320  printf("%8f ", resErr.At(0, i, j));
2321  printf("\n");
2322  }
2323  printf("\n");
2324  }
2325 #endif
2326 
2327  //invert the 2x2 matrix
2329 
2330  if (kfOp & KFO_Calculate_Chi2) {
2331  Chi2Similarity(res, resErr, outChi2);
2332 
2333 #ifdef DEBUG
2334  {
2335  dmutex_guard;
2336  printf("resErr_loc (Inv):\n");
2337  for (int i = 0; i < 2; ++i) {
2338  for (int j = 0; j < 2; ++j)
2339  printf("%8f ", resErr.At(0, i, j));
2340  printf("\n");
2341  }
2342  printf("\n");
2343  printf("chi2: %8f\n", outChi2.At(0, 0, 0));
2344  }
2345 #endif
2346  }
2347 
2348  if (kfOp & KFO_Update_Params) {
2349  MPlexL2 K;
2350  KalmanGain(psErr, resErr, K);
2351 
2352  MultResidualsAdd(K, psPar, res, outPar);
2353 
2354  squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi|
2355 
2356  KHC(K, psErr, outErr);
2357 
2358 #ifdef DEBUG
2359  {
2360  dmutex_guard;
2361  printf("outErr before subtract:\n");
2362  for (int i = 0; i < 6; ++i) {
2363  for (int j = 0; j < 6; ++j)
2364  printf("%8f ", outErr.At(0, i, j));
2365  printf("\n");
2366  }
2367  printf("\n");
2368  }
2369 #endif
2370 
2371  outErr.subtract(psErr, outErr);
2372 
2373 #ifdef DEBUG
2374  {
2375  dmutex_guard;
2376  printf("res:\n");
2377  for (int i = 0; i < 2; ++i) {
2378  printf("%8f ", res.At(0, i, 0));
2379  }
2380  printf("\n");
2381  printf("resErr (Inv):\n");
2382  for (int i = 0; i < 2; ++i) {
2383  for (int j = 0; j < 2; ++j)
2384  printf("%8f ", resErr.At(0, i, j));
2385  printf("\n");
2386  }
2387  printf("\n");
2388  printf("K:\n");
2389  for (int i = 0; i < 6; ++i) {
2390  for (int j = 0; j < 2; ++j)
2391  printf("%8f ", K.At(0, i, j));
2392  printf("\n");
2393  }
2394  printf("\n");
2395  printf("outPar:\n");
2396  for (int i = 0; i < 6; ++i) {
2397  printf("%8f ", outPar.At(0, i, 0));
2398  }
2399  printf("\n");
2400  printf("outErr:\n");
2401  for (int i = 0; i < 6; ++i) {
2402  for (int j = 0; j < 6; ++j)
2403  printf("%8f ", outErr.At(0, i, j));
2404  printf("\n");
2405  }
2406  printf("\n");
2407  }
2408 #endif
2409  }
2410  }
2411 
2412 } // end namespace mkfit
Matriplex::Matriplex< float, HH, HH, NN > MPlexHH
Definition: Matrix.h:57
Matriplex::MatriplexSym< float, 5, NN > MPlex5S
Definition: Matrix.h:62
Matriplex::Matriplex< float, LL, LL, NN > MPlexLL
Definition: Matrix.h:53
MPlex< T, D1, D2, N > hypot(const MPlex< T, D1, D2, N > &a, const MPlex< T, D1, D2, N > &b)
Definition: Matriplex.h:616
#define CMS_SA_ALLOW
void propagateHelixToRMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msRad, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr)
Definition: APVGainStruct.h:7
void kalmanOperationEndcap(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, HH, 1, NN > MPlexHV
Definition: Matrix.h:58
void kalmanUpdate(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
Sin< T >::type sin(const T &t)
Definition: Sin.h:22
void kalmanComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, LL, 2, NN > MPlexL2
Definition: Matrix.h:76
float getTheta(float r, float z)
Definition: Hit.h:36
Matriplex::Matriplex< float, LL, 1, NN > MPlexLV
Definition: Matrix.h:54
void kalmanPropagateAndUpdate(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
assert(be >=bs)
__host__ __device__ VT * co
Definition: prefixScan.h:47
Definition: Electron.h:6
Matriplex::Matriplex< float, 5, 5, NN > MPlex55
Definition: Matrix.h:64
void squashPhiMPlex(MPlexLV &par, const int N_proc)
void kalmanOperation(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
void invertCramerSym(MPlexSym< T, D, N > &A, double *determ=nullptr)
Definition: MatriplexSym.h:421
void kalmanUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
void propagateHelixToZMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexQF &msZ, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr=nullptr)
void kalmanComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
void propagateHelixToPlaneMPlex(const MPlexLS &inErr, const MPlexLV &inPar, const MPlexQI &inChg, const MPlexHV &plPnt, const MPlexHV &plNrm, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &pflags, const MPlexQI *noMatEffPtr=nullptr)
T sqrt(T t)
Definition: SSEVec.h:23
constexpr Matriplex::idx_t NN
Definition: Matrix.h:48
Cos< T >::type cos(const T &t)
Definition: Cos.h:22
constexpr float Bfield
Definition: Config.h:60
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
void kalmanPropagateAndUpdateEndcap(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
void kalmanPropagateAndComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
double f[11][100]
void kalmanOperationPlaneLocal(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, HH, 2, NN > MPlexH2
Definition: Matrix.h:77
d
Definition: ztail.py:151
Matriplex::Matriplex< int, 1, 1, NN > MPlexQI
Definition: Matrix.h:81
Matriplex::Matriplex< float, 5, 6, NN > MPlex56
Definition: Matrix.h:65
void kalmanPropagateAndComputeChi2Endcap(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::Matriplex< float, 2, HH, NN > MPlex2H
Definition: Matrix.h:78
constexpr float sol
Definition: Config.h:13
Matriplex::Matriplex< float, 1, 1, NN > MPlexQF
Definition: Matrix.h:80
#define N
Definition: blowfish.cc:9
DecomposeProduct< arg, typename Div::arg > D
Definition: Factorize.h:141
Matriplex::MatriplexSym< float, 2, NN > MPlex2S
Definition: Matrix.h:70
Matriplex::Matriplex< float, 5, 2, NN > MPlex52
Definition: Matrix.h:75
double b
Definition: hdecay.h:120
float getPhi(float x, float y)
Definition: Hit.h:34
Matriplex::MatriplexSym< float, HH, NN > MPlexHS
Definition: Matrix.h:59
Matriplex::Matriplex< float, LL, HH, NN > MPlexLH
Definition: Matrix.h:72
void kalmanOperationPlane(const int kfOp, const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexLS &outErr, MPlexLV &outPar, MPlexQF &outChi2, const int N_proc)
Matriplex::Matriplex< float, 2, 1, NN > MPlex2V
Definition: Matrix.h:69
void kalmanUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexLS &outErr, MPlexLV &outPar, const int N_proc)
double a
Definition: hdecay.h:121
void kalmanPropagateAndComputeChi2Plane(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexQF &outChi2, MPlexLV &propPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)
Matriplex::MatriplexSym< float, LL, NN > MPlexLS
Definition: Matrix.h:55
void kalmanComputeChi2(const MPlexLS &psErr, const MPlexLV &psPar, const MPlexQI &inChg, const MPlexHS &msErr, const MPlexHV &msPar, MPlexQF &outChi2, const int N_proc)
Definition: APVGainStruct.h:7
long double T
Matriplex::Matriplex< float, 5, 1, NN > MPlex5V
Definition: Matrix.h:61
#define ASSUME_ALIGNED(a, b)
Matriplex::Matriplex< float, 6, 5, NN > MPlex65
Definition: Matrix.h:66
void kalmanPropagateAndUpdatePlane(const MPlexLS &psErr, const MPlexLV &psPar, MPlexQI &Chg, const MPlexHS &msErr, const MPlexHV &msPar, const MPlexHV &plNrm, const MPlexHV &plDir, const MPlexHV &plPnt, MPlexLS &outErr, MPlexLV &outPar, MPlexQI &outFailFlag, const int N_proc, const PropagationFlags &propFlags, const bool propToHit)