1 #ifndef RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h 2 #define RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h 25 ALPAKA_FN_ACC ALPAKA_FN_INLINE
bool use_sample(
unsigned int sample_mask,
unsigned int sample) {
36 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
40 bool*
const useless_sample_values,
44 uint32_t
const nchannels)
const {
48 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
51 auto* s_sum0 = alpaka::getDynSharedMem<char>(acc);
52 auto* s_sum1 =
reinterpret_cast<ScalarType*
>(s_sum0 + elemsPerBlock);
53 auto* s_sumA = s_sum1 + elemsPerBlock;
54 auto* s_sumAA = s_sumA + elemsPerBlock;
58 auto tx = nchannels * nsamples - 1 - txforward;
59 auto const ch = tx / nsamples;
61 auto const sample = tx % nsamples;
62 auto const ltx = tx % elemsPerBlock;
65 auto const inv_error =
66 useless_sample_values[tx] ? 0. : 1. / (sample_value_errors[tx] * sample_value_errors[tx]);
67 auto const sample_value = sample_values[tx];
68 s_sum0[ltx] = useless_sample_values[tx] ? 0 : 1;
69 s_sum1[ltx] = inv_error;
70 s_sumA[ltx] = sample_value * inv_error;
71 s_sumAA[ltx] = sample_value * sample_value * inv_error;
72 alpaka::syncBlockThreads(acc);
76 s_sum0[ltx] += s_sum0[ltx + 5];
77 s_sum1[ltx] += s_sum1[ltx + 5];
78 s_sumA[ltx] += s_sumA[ltx + 5];
79 s_sumAA[ltx] += s_sumAA[ltx + 5];
81 alpaka::syncBlockThreads(acc);
85 s_sum0[ltx] += s_sum0[ltx + 2] + s_sum0[ltx + 3];
86 s_sum1[ltx] += s_sum1[ltx + 2] + s_sum1[ltx + 3];
87 s_sumA[ltx] += s_sumA[ltx + 2] + s_sumA[ltx + 3];
88 s_sumAA[ltx] += s_sumAA[ltx + 2] + s_sumAA[ltx + 3];
90 alpaka::syncBlockThreads(acc);
94 auto const sum0 = s_sum0[ltx] + s_sum0[ltx + 1] - s_sum0[ltx + 3];
95 auto const sum1 = s_sum1[ltx] + s_sum1[ltx + 1] - s_sum1[ltx + 3];
96 auto const sumA = s_sumA[ltx] + s_sumA[ltx + 1] - s_sumA[ltx + 3];
97 auto const sumAA = s_sumAA[ltx] + s_sumAA[ltx + 1] - s_sumAA[ltx + 3];
98 auto const chi2 = sum0 > 0 ? (sumAA - sumA * sumA / sum1) / sum0 : static_cast<ScalarType>(0);
103 #ifdef DEBUG_TC_NULLHYPOT 105 printf(
"chi2 = %f sum0 = %d sumAA = %f\n",
chi2, static_cast<int>(sum0), sumAA);
123 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
125 EcalDigiDeviceCollection::ConstView digisDevEB,
126 EcalDigiDeviceCollection::ConstView digisDevEE,
129 bool*
const useless_sample_values,
130 char*
const pedestal_nums,
138 EcalMultifitParametersDevice::ConstView paramsDev,
144 constexpr uint32_t nchannels_per_block = 10;
145 constexpr auto nthreads_per_channel = nchannels_per_block * (nchannels_per_block - 1) / 2;
147 auto const nchannels = digisDevEB.size() + digisDevEE.size();
148 auto const offsetForInputs = digisDevEB.size();
149 auto const totalElements = nthreads_per_channel * nchannels;
151 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
154 auto* shr_chi2s = alpaka::getDynSharedMem<ScalarType>(acc);
155 auto* shr_time_wgt = shr_chi2s + elemsPerBlock;
156 auto* shr_time_max = shr_time_wgt + elemsPerBlock;
157 auto* shrTimeMax = shr_time_max + elemsPerBlock;
158 auto* shrTimeWgt = shrTimeMax + elemsPerBlock;
159 auto* shr_chi2 = shrTimeWgt + elemsPerBlock;
160 auto* shr_tmax = shr_chi2 + elemsPerBlock;
161 auto* shr_tmaxerr = shr_tmax + elemsPerBlock;
162 auto* shr_condForUselessSamples =
reinterpret_cast<bool*
>(shr_tmaxerr + elemsPerBlock);
163 auto* shr_internalCondForSkipping1 = shr_condForUselessSamples + elemsPerBlock;
164 auto* shr_internalCondForSkipping2 = shr_internalCondForSkipping1 + elemsPerBlock;
168 auto const ch =
idx.global / nthreads_per_channel;
169 auto const ltx =
idx.global % nthreads_per_channel;
171 auto const ch_start = ch * nsamples;
172 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
173 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
175 auto const did =
DetId{dids[inputCh]};
177 auto*
const amplitudeFitParameters =
178 isBarrel ? paramsDev.amplitudeFitParamsEB().data() : paramsDev.amplitudeFitParamsEE().data();
179 auto*
const timeFitParameters =
180 isBarrel ? paramsDev.timeFitParamsEB().data() : paramsDev.timeFitParamsEE().data();
181 auto const timeFitParameters_size =
182 isBarrel ? paramsDev.timeFitParamsEB().size() : paramsDev.timeFitParamsEE().size();
183 auto const timeFitLimits_first =
isBarrel ? timeFitLimits_firstEB : timeFitLimits_firstEE;
184 auto const timeFitLimits_second =
isBarrel ? timeFitLimits_secondEB : timeFitLimits_secondEE;
192 }
else if (ltx <= 16) {
194 sample_j = 2 + ltx - 9;
195 }
else if (ltx <= 23) {
197 sample_j = 3 + ltx - 17;
198 }
else if (ltx <= 29) {
200 sample_j = 4 + ltx - 24;
201 }
else if (ltx <= 34) {
203 sample_j = 5 + ltx - 30;
204 }
else if (ltx <= 38) {
206 sample_j = 6 + ltx - 35;
207 }
else if (ltx <= 41) {
209 sample_j = 7 + ltx - 39;
210 }
else if (ltx <= 43) {
212 sample_j = 8 + ltx - 42;
213 }
else if (ltx <= 44) {
221 auto const tx_i = ch_start + sample_i;
222 auto const tx_j = ch_start + sample_j;
229 bool const condForUselessSamples = useless_sample_values[tx_i] || useless_sample_values[tx_j] ||
230 sample_values[tx_i] <= 1 || sample_values[tx_j] <= 1;
238 shrTimeMax[
idx.local] = 0;
239 shrTimeWgt[
idx.local] = 0;
241 bool internalCondForSkipping1 =
true;
242 bool internalCondForSkipping2 =
true;
243 if (!condForUselessSamples) {
244 auto const rtmp = sample_values[tx_i] / sample_values[tx_j];
245 auto const invampl_i = 1. / sample_values[tx_i];
246 auto const relErr2_i = sample_value_errors[tx_i] * sample_value_errors[tx_i] * invampl_i * invampl_i;
247 auto const invampl_j = 1. / sample_values[tx_j];
248 auto const relErr2_j = sample_value_errors[tx_j] * sample_value_errors[tx_j] * invampl_j * invampl_j;
249 auto const err1 = rtmp * rtmp * (relErr2_i + relErr2_j);
251 sample_value_errors[tx_j] * (sample_values[tx_i] - sample_values[tx_j]) * (invampl_j * invampl_j);
255 if (pedestal_nums[ch] == 2)
257 auto const err3 = (0.289 * 0.289) * (invampl_j * invampl_j);
258 auto const total_error =
std::sqrt(err1 + err2 + err3);
260 auto const alpha = amplitudeFitParameters[0];
261 auto const beta = amplitudeFitParameters[1];
263 auto const invalphabeta = 1. / alphabeta;
266 auto const ratio_index = sample_i;
267 auto const ratio_step = sample_j - sample_i;
268 auto const ratio_value = rtmp;
269 auto const ratio_error = total_error;
271 auto const rlim_i_j =
fast_expf(static_cast<ScalarType>(sample_j - sample_i) /
beta) - 0.001;
272 internalCondForSkipping1 = !(total_error < 1. && rtmp > 0.001 && rtmp < rlim_i_j);
273 if (!internalCondForSkipping1) {
280 auto const l_timeFitLimits_first = timeFitLimits_first;
281 auto const l_timeFitLimits_second = timeFitLimits_second;
282 if (ratio_step == 1 && ratio_value >= l_timeFitLimits_first && ratio_value <= l_timeFitLimits_second) {
283 auto const time_max_i =
static_cast<ScalarType>(ratio_index);
284 auto u = timeFitParameters[timeFitParameters_size - 1];
286 for (
int k = timeFitParameters_size - 2;
k >= 0; --
k)
287 u = u * ratio_value + timeFitParameters[
k];
289 auto du = (timeFitParameters_size - 1) * (timeFitParameters[timeFitParameters_size - 1]);
290 for (
int k = timeFitParameters_size - 2;
k >= 1; --
k)
291 du = du * ratio_value +
k * timeFitParameters[
k];
293 auto const error2 = ratio_error * ratio_error * du * du;
294 auto const time_max = error2 > 0 ? (time_max_i - u) / error2 : static_cast<ScalarType>(0);
295 auto const time_wgt = error2 > 0 ? 1. / error2 :
static_cast<ScalarType>(0);
300 shrTimeMax[
idx.local] = error2 > 0 ? time_max : 0;
301 shrTimeWgt[
idx.local] = error2 > 0 ? time_wgt : 0;
303 shrTimeMax[
idx.local] = 0;
304 shrTimeWgt[
idx.local] = 0;
308 auto const stepOverBeta =
static_cast<ScalarType>(ratio_step) /
beta;
310 auto const rmin =
std::max(ratio_value - ratio_error, 0.001);
317 tmax = 0.5 * (time1 + time2);
318 tmaxerr = 0.5 *
std::sqrt((time1 - time2) * (time1 - time2));
319 #ifdef DEBUG_TC_MAKERATIO 320 if (ch == 1 || ch == 0)
322 "ch = %d ltx = %d tmax = %f tmaxerr = %f time1 = %f time2 = %f offset = %f rmin = %f rmax = " 337 const int itmin =
std::max(-1, static_cast<int>(std::floor(
tmax - alphabeta)));
338 auto loffset = (
static_cast<ScalarType>(itmin) -
tmax) * invalphabeta;
340 for (
int it = itmin + 1;
it < nsamples; ++
it) {
341 loffset += invalphabeta;
342 if (useless_sample_values[ch_start +
it])
344 auto const inverr2 = 1. / (sample_value_errors[ch_start +
it] * sample_value_errors[ch_start +
it]);
345 auto const term1 = 1. + loffset;
347 sumAf += sample_values[ch_start +
it] * (
f * inverr2);
348 sumff +=
f * (
f * inverr2);
351 auto const sumAA = sumAAsNullHypot[ch];
352 auto const sum0 = sum0sNullHypot[ch];
356 chi2 = sumAA - sumAf * (sumAf / sumff);
360 #ifdef DEBUG_TC_MAKERATIO 361 if (ch == 1 || ch == 0)
363 "ch = %d ltx = %d sumAf = %f sumff = %f sumAA = %f sum0 = %d tmax = %f tmaxerr = %f chi2 = " 370 static_cast<int>(sum0),
376 if (
chi2 > 0 &&
tmax > 0 && tmaxerr > 0)
377 internalCondForSkipping2 =
false;
387 shr_tmaxerr[
idx.local] = tmaxerr;
388 shr_condForUselessSamples[
idx.local] = condForUselessSamples;
389 shr_internalCondForSkipping1[
idx.local] = internalCondForSkipping1;
390 shr_internalCondForSkipping2[
idx.local] = internalCondForSkipping2;
393 alpaka::syncBlockThreads(acc);
397 auto iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
398 bool oddElements = nthreads_per_channel % 2;
402 auto const ltx =
idx.global % nthreads_per_channel;
404 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
407 shr_chi2s[
idx.local] =
std::min(shr_chi2s[
idx.local], shr_chi2s[
idx.local + iter]);
410 alpaka::syncBlockThreads(acc);
412 oddElements = iter % 2;
413 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
417 auto const ltx =
idx.global % nthreads_per_channel;
420 auto const condForUselessSamples = shr_condForUselessSamples[
idx.local];
421 auto const internalCondForSkipping1 = shr_internalCondForSkipping1[
idx.local];
422 auto const internalCondForSkipping2 = shr_internalCondForSkipping2[
idx.local];
424 if (!condForUselessSamples && !internalCondForSkipping1 && !internalCondForSkipping2) {
427 auto const chi2 = shr_chi2[
idx.local];
428 auto const chi2min = shr_chi2s[
idx.local - ltx];
429 auto const chi2Limit = chi2min + 1.;
430 auto const tmaxerr = shr_tmaxerr[
idx.local];
431 auto const inverseSigmaSquared =
chi2 < chi2Limit ? 1. / (tmaxerr * tmaxerr) : 0.;
433 #ifdef DEBUG_TC_MAKERATIO 434 if (ch == 1 || ch == 0) {
435 auto const ch =
idx.global / nthreads_per_channel;
436 printf(
"ch = %d ltx = %d chi2min = %f chi2Limit = %f inverseSigmaSquared = %f\n",
441 inverseSigmaSquared);
448 auto const tmax = shr_tmax[
idx.local];
449 shr_time_wgt[
idx.local] = inverseSigmaSquared;
450 shr_time_max[
idx.local] =
tmax * inverseSigmaSquared;
452 shr_time_wgt[
idx.local] = 0;
453 shr_time_max[
idx.local] = 0;
457 alpaka::syncBlockThreads(acc);
460 iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
461 oddElements = nthreads_per_channel % 2;
465 auto const ltx =
idx.global % nthreads_per_channel;
467 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
468 shr_time_wgt[
idx.local] += shr_time_wgt[
idx.local + iter];
469 shr_time_max[
idx.local] += shr_time_max[
idx.local + iter];
470 shrTimeMax[
idx.local] += shrTimeMax[
idx.local + iter];
471 shrTimeWgt[
idx.local] += shrTimeWgt[
idx.local + iter];
475 alpaka::syncBlockThreads(acc);
476 oddElements = iter % 2;
477 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
481 auto const ltx =
idx.global % nthreads_per_channel;
487 auto const ch =
idx.global / nthreads_per_channel;
488 auto const tmp_time_max = shr_time_max[
idx.local];
489 auto const tmp_time_wgt = shr_time_wgt[
idx.local];
492 if (tmp_time_wgt == 0 && tmp_time_max == 0) {
498 auto const tMaxAlphaBeta = tmp_time_max / tmp_time_wgt;
499 auto const tMaxErrorAlphaBeta = 1. /
std::sqrt(tmp_time_wgt);
501 tMaxAlphaBetas[ch] = tMaxAlphaBeta;
502 tMaxErrorAlphaBetas[ch] = tMaxErrorAlphaBeta;
503 g_accTimeMax[ch] = shrTimeMax[
idx.local];
504 g_accTimeWgt[ch] = shrTimeWgt[
idx.local];
507 #ifdef DEBUG_TC_MAKERATIO 508 printf(
"ch = %d time_max = %f time_wgt = %f\n", ch, tmp_time_max, tmp_time_wgt);
509 printf(
"ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f timeMax = %f timeWgt = %f\n",
513 shrTimeMax[
idx.local],
514 shrTimeWgt[
idx.local]);
526 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
528 EcalDigiDeviceCollection::ConstView digisDevEB,
529 EcalDigiDeviceCollection::ConstView digisDevEE,
532 bool*
const useless_samples,
545 EcalMultifitParametersDevice::ConstView paramsDev)
const {
553 auto const nchannels = digisDevEB.size() + digisDevEE.size();
554 auto const offsetForInputs = digisDevEB.size();
556 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
561 auto* shr_sumAf = alpaka::getDynSharedMem<ScalarType>(acc);
562 auto* shr_sumff = shr_sumAf + elemsPerBlock;
566 auto gtx = nchannels * nsamples - 1 - gtxforward;
567 auto const ch = gtx / nsamples;
568 auto const elemIdx = gtx % elemsPerBlock;
569 auto const sample = elemIdx % nsamples;
571 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
572 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
574 auto state = g_state[ch];
575 auto const did =
DetId{dids[inputCh]};
576 auto*
const amplitudeFitParameters = did.
subdetId() ==
EcalBarrel ? paramsDev.amplitudeFitParamsEB().data()
577 : paramsDev.amplitudeFitParamsEE().data();
582 auto const alpha = amplitudeFitParameters[0];
583 auto const beta = amplitudeFitParameters[1];
585 auto const invalphabeta = 1. / alphabeta;
586 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
587 auto const sample_value = sample_values[gtx];
588 auto const sample_value_error = sample_value_errors[gtx];
590 useless_samples[gtx] ?
static_cast<ScalarType>(0) : 1. / (sample_value_error * sample_value_error);
592 auto const term1 = 1. +
offset;
594 auto const sumAf = sample_value * (
f * inverr2);
595 auto const sumff =
f * (
f * inverr2);
598 shr_sumAf[elemIdx] = sumAf;
599 shr_sumff[elemIdx] = sumff;
601 shr_sumAf[elemIdx] = 0;
602 shr_sumff[elemIdx] = 0;
605 alpaka::syncBlockThreads(acc);
610 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 5];
611 shr_sumff[elemIdx] += shr_sumff[elemIdx + 5];
614 alpaka::syncBlockThreads(acc);
618 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 2] + shr_sumAf[elemIdx + 3];
619 shr_sumff[elemIdx] += shr_sumff[elemIdx + 2] + shr_sumff[elemIdx + 3];
622 alpaka::syncBlockThreads(acc);
629 g_timeError[ch] = -999;
634 auto const sumff = shr_sumff[elemIdx] + shr_sumff[elemIdx + 1] - shr_sumff[elemIdx + 3];
635 auto const sumAf = shr_sumAf[elemIdx] + shr_sumAf[elemIdx + 1] - shr_sumAf[elemIdx + 3];
637 auto const ampMaxAlphaBeta = sumff > 0 ? sumAf / sumff : 0;
638 auto const sumAA = sumAAsNullHypot[ch];
639 auto const sum0 = sum0sNullHypot[ch];
640 auto const nullChi2 = chi2sNullHypot[ch];
642 auto const chi2AlphaBeta = (sumAA - sumAf * sumAf / sumff) / sum0;
643 if (chi2AlphaBeta > nullChi2) {
646 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 647 printf(
"ch = %d chi2AlphaBeta = %f nullChi2 = %f sumAA = %f sumAf = %f sumff = %f sum0 = %f\n",
659 g_ampMaxAlphaBeta[ch] = ampMaxAlphaBeta;
661 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 662 printf(
"ch = %d sum0 = %f sumAA = %f sumff = %f sumAf = %f\n", ch, sum0, sumAA, sumff, sumAf);
672 g_timeError[ch] = -999;
673 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 674 printf(
"ch = %d finished state\n", ch);
679 auto const ampMaxError = g_ampMaxError[ch];
680 auto const test_ratio = ampMaxAlphaBeta / ampMaxError;
681 auto const accTimeMax = g_accTimeMax[ch];
682 auto const accTimeWgt = g_accTimeWgt[ch];
683 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
684 auto const tMaxErrorAlphaBeta = g_tMaxErrorAlphaBeta[ch];
687 if (test_ratio > 5. && accTimeWgt > 0) {
688 auto const tMaxRatio = accTimeWgt > 0 ? accTimeMax / accTimeWgt :
static_cast<ScalarType>(0);
689 auto const tMaxErrorRatio = accTimeWgt > 0 ? 1. /
std::sqrt(accTimeWgt) :
static_cast<ScalarType>(0);
691 if (test_ratio > 10.) {
692 g_timeMax[ch] = tMaxRatio;
693 g_timeError[ch] = tMaxErrorRatio;
695 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 696 printf(
"ch = %d tMaxRatio = %f tMaxErrorRatio = %f\n", ch, tMaxRatio, tMaxErrorRatio);
699 auto const timeMax = (tMaxAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
700 tMaxRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
702 auto const timeError = (tMaxErrorAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
703 tMaxErrorRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
708 g_timeError[ch] = timeError;
710 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 711 printf(
"ch = %d timeMax = %f timeError = %f\n", ch,
timeMax, timeError);
717 g_timeMax[ch] = tMaxAlphaBeta;
718 g_timeError[ch] = tMaxErrorAlphaBeta;
720 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH 721 printf(
"ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f\n", ch, tMaxAlphaBeta, tMaxErrorAlphaBeta);
733 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
735 EcalDigiDeviceCollection::ConstView digisDevEB,
736 EcalDigiDeviceCollection::ConstView digisDevEE,
737 EcalMultifitConditionsDevice::ConstView conditionsDev,
740 bool* useless_sample_values)
const {
744 auto const nchannelsEB = digisDevEB.size();
745 auto const offsetForInputs = nchannelsEB;
747 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
750 auto const elemIdx = gtx % elemsPerBlock;
751 auto const sample = elemIdx % nsamples;
752 auto const ch = gtx / nsamples;
761 int const inputGtx = ch >= offsetForInputs ? gtx - offsetForInputs * nsamples : gtx;
762 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
766 if (gainIdPrev >= 1 && gainIdPrev <= 3 && gainIdNext >= 1 && gainIdNext <= 3 && gainIdPrev < gainIdNext) {
767 sample_values[gtx - 1] = 0;
768 sample_value_errors[gtx - 1] = 1
e+9;
769 useless_sample_values[gtx - 1] =
true;
780 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
782 EcalDigiDeviceCollection::ConstView digisDevEB,
783 EcalDigiDeviceCollection::ConstView digisDevEE,
784 EcalMultifitConditionsDevice::ConstView conditionsDev,
788 bool* useless_sample_values,
789 char* pedestal_nums)
const {
794 auto const nchannelsEB = digisDevEB.size();
795 auto const nchannels = nchannelsEB + digisDevEE.size();
796 auto const offsetForInputs = nchannelsEB;
797 auto const offsetForHashes = conditionsDev.offsetEE();
799 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
802 auto* shrSampleValues = alpaka::getDynSharedMem<ScalarType>(acc);
803 auto* shrSampleValueErrors = shrSampleValues + elemsPerBlock;
807 auto tx = nchannels * nsamples - 1 - txforward;
808 auto const ch = tx / nsamples;
809 auto const elemIdx = tx % elemsPerBlock;
811 int const inputTx = ch >= offsetForInputs ? tx - offsetForInputs * nsamples : tx;
812 int const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
813 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
814 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
817 auto const sample = tx % nsamples;
818 auto const input_ch_start = inputCh * nsamples;
827 auto const did =
DetId{dids[inputCh]};
829 auto const sample_mask =
isBarrel ? conditionsDev.sampleMask_EB() : conditionsDev.sampleMask_EE();
835 if (gainId0 == 1 &&
use_sample(sample_mask, 0)) {
839 auto const diff = adc1 - adc0;
840 if (gainId1 == 1 &&
use_sample(sample_mask, 1) &&
841 std::abs(
diff) < 3 * conditionsDev.pedestals_rms_x12()[hashedId]) {
846 pedestal = conditionsDev.pedestals_mean_x12()[ch];
861 sample_value_error = 0;
864 sample_value_error = conditionsDev.pedestals_rms_x12()[hashedId];
866 auto const mean_x6 = conditionsDev.pedestals_mean_x6()[hashedId];
867 auto const rms_x6 = conditionsDev.pedestals_rms_x6()[hashedId];
868 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
869 sample_value = (
static_cast<ScalarType>(
adc) - mean_x6) * gain12Over6;
870 sample_value_error = rms_x6 * gain12Over6;
872 auto const mean_x1 = conditionsDev.pedestals_mean_x1()[hashedId];
873 auto const rms_x1 = conditionsDev.pedestals_rms_x1()[hashedId];
874 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
875 auto const gain6Over1 = conditionsDev.gain6Over1()[hashedId];
876 sample_value = (
static_cast<ScalarType>(
adc) - mean_x1) * gain6Over1 * gain12Over6;
877 sample_value_error = rms_x1 * gain6Over1 * gain12Over6;
880 sample_value_error = 0;
885 auto const useless_sample = (sample_value_error <= 0) | bad;
886 useless_sample_values[tx] = useless_sample;
887 sample_values[tx] = sample_value;
888 sample_value_errors[tx] = useless_sample ? 1
e+9 : sample_value_error;
891 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG 893 printf(
"sample = %d sample_value = %f sample_value_error = %f useless = %c\n",
897 useless_sample ?
'1' :
'0');
903 shrSampleValueErrors[elemIdx] = sample_value_error;
904 alpaka::syncBlockThreads(acc);
909 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 5]
910 ? shrSampleValueErrors[elemIdx + 5]
911 : shrSampleValueErrors[elemIdx];
912 shrSampleValues[elemIdx] =
std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 5]);
914 alpaka::syncBlockThreads(acc);
918 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 3]
919 ? shrSampleValueErrors[elemIdx + 3]
920 : shrSampleValueErrors[elemIdx];
921 shrSampleValues[elemIdx] =
std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 3]);
923 alpaka::syncBlockThreads(acc);
926 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 2]
927 ? shrSampleValueErrors[elemIdx + 2]
928 : shrSampleValueErrors[elemIdx];
929 shrSampleValues[elemIdx] =
std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 2]);
931 alpaka::syncBlockThreads(acc);
935 auto const maxSampleValueError = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 1]
936 ? shrSampleValueErrors[elemIdx + 1]
937 : shrSampleValueErrors[elemIdx];
940 pedestal_nums[ch] =
num;
942 ampMaxError[ch] = maxSampleValueError;
945 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG 947 printf(
"pedestal_nums = %d ampMaxError = %f\n",
num, maxSampleValueError);
963 template <
typename TAcc,
typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
965 EcalDigiDeviceCollection::ConstView digisDevEB,
966 EcalDigiDeviceCollection::ConstView digisDevEE,
969 EcalMultifitConditionsDevice::ConstView conditionsDev,
988 auto const nchannelsEB = digisDevEB.size();
989 auto const nchannels = nchannelsEB + digisDevEE.size();
990 auto const offsetForInputs = nchannelsEB;
991 auto const offsetForHashes = conditionsDev.offsetEE();
994 const int inputGtx = gtx >= offsetForInputs ? gtx - offsetForInputs : gtx;
995 auto const* dids = gtx >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
996 auto const* digis = gtx >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
998 auto* g_amplitude = gtx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
999 auto* g_jitter = gtx >= nchannelsEB ? uncalibRecHitsEE.jitter() : uncalibRecHitsEB.jitter();
1000 auto* g_jitterError = gtx >= nchannelsEB ? uncalibRecHitsEE.jitterError() : uncalibRecHitsEB.jitterError();
1001 auto*
flags = gtx >= nchannelsEB ? uncalibRecHitsEE.flags() : uncalibRecHitsEB.flags();
1003 auto const did =
DetId{dids[inputGtx]};
1008 auto*
const amplitudeBins =
isBarrel ? conditionsDev.timeBiasCorrections_amplitude_EB().data()
1009 : conditionsDev.timeBiasCorrections_amplitude_EE().data();
1010 auto*
const shiftBins =
isBarrel ? conditionsDev.timeBiasCorrections_shift_EB().data()
1011 : conditionsDev.timeBiasCorrections_shift_EE().data();
1012 auto const amplitudeBinsSize =
1013 isBarrel ? conditionsDev.timeBiasCorrectionSizeEB() : conditionsDev.timeBiasCorrectionSizeEE();
1014 auto const timeConstantTerm =
isBarrel ? timeConstantTermEB : timeConstantTermEE;
1015 auto const timeNconst =
isBarrel ? timeNconstEB : timeNconstEE;
1016 auto const offsetTimeValue =
isBarrel ? conditionsDev.timeOffset_EB() : conditionsDev.timeOffset_EE();
1018 auto const outOfTimeThreshG12p =
isBarrel ? outOfTimeThreshG12pEB : outOfTimeThreshG12pEE;
1019 auto const outOfTimeThreshG12m =
isBarrel ? outOfTimeThreshG12mEB : outOfTimeThreshG12mEE;
1020 auto const outOfTimeThreshG61p =
isBarrel ? outOfTimeThreshG61pEB : outOfTimeThreshG61pEE;
1021 auto const outOfTimeThreshG61m =
isBarrel ? outOfTimeThreshG61mEB : outOfTimeThreshG61mEE;
1024 auto const amplitude = g_amplitude[inputGtx];
1025 auto const rms_x12 = conditionsDev.pedestals_rms_x12()[hashedId];
1026 auto const timeCalibConst = conditionsDev.timeCalibConstants()[hashedId];
1029 for (
size_t bin = 0;
bin < amplitudeBinsSize; ++
bin) {
1039 }
else if (myBin == static_cast<int>(amplitudeBinsSize) - 1) {
1042 correction = shiftBins[myBin + 1] - shiftBins[myBin];
1043 correction *= (
amplitude - amplitudeBins[myBin]) / (amplitudeBins[myBin + 1] - amplitudeBins[myBin]);
1049 auto const timeMax = g_timeMax[gtx];
1050 auto const timeError = g_timeError[gtx];
1052 auto const jitterError =
1053 std::sqrt(timeError * timeError + timeConstantTerm * timeConstantTerm * 0.04 * 0.04);
1055 #ifdef DEBUG_TIME_CORRECTION 1056 printf(
"ch = %d timeMax = %f timeError = %f jitter = %f correction = %f\n",
1065 g_jitter[inputGtx] = jitter;
1066 g_jitterError[inputGtx] = jitterError;
1072 auto threshP = outOfTimeThreshG12p;
1073 auto threshM = outOfTimeThreshG12m;
1075 for (
int isample = 0; isample < nsamples; isample++) {
1076 auto const gainid =
ecalMGPA::gainId(digis[nsamples * inputGtx + isample]);
1078 threshP = outOfTimeThreshG61p;
1079 threshM = outOfTimeThreshG61m;
1085 auto const correctedTime = (
timeMax - 5) * 25 + timeCalibConst + offsetTimeValue;
1086 auto const nterm = timeNconst * rms_x12 /
amplitude;
1087 auto const sigmat =
std::sqrt(nterm * nterm + timeConstantTerm * timeConstantTerm);
1088 if (correctedTime > sigmat * threshP || correctedTime < -sigmat * threshM)
1101 template <
typename TAcc>
1104 template <
typename TVec,
typename... TArgs>
1106 TVec
const& threadsPerBlock,
1107 TVec
const& elemsPerThread,
1108 TArgs
const&...) -> std::size_t {
1112 std::size_t bytes = threadsPerBlock[0u] * elemsPerThread[0u] * 4 *
sizeof(
ScalarType);
1118 template <
typename TAcc>
1120 template <
typename TVec,
typename... TArgs>
1122 TVec
const& threadsPerBlock,
1123 TVec
const& elemsPerThread,
1124 TArgs
const&...) -> std::size_t {
1127 std::size_t bytes = (8 *
sizeof(
ScalarType) + 3 *
sizeof(
bool)) * threadsPerBlock[0u] * elemsPerThread[0u];
1133 template <
typename TAcc>
1135 template <
typename TVec,
typename... TArgs>
1137 TVec
const& threadsPerBlock,
1138 TVec
const& elemsPerThread,
1139 TArgs
const&...) -> std::size_t {
1142 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
sizeof(
ScalarType);
1148 template <
typename TAcc>
1150 template <
typename TVec,
typename... TArgs>
1152 TVec
const& threadsPerBlock,
1153 TVec
const& elemsPerThread,
1154 TArgs
const&...) -> std::size_t {
1157 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
sizeof(
ScalarType);
1164 #endif // RecoLocalCalo_EcalRecProducers_plugins_TimeComputationKernels_h
ALPAKA_FN_ACC void operator()(TAcc const &acc, EcalDigiDeviceCollection::ConstView digisDevEB, EcalDigiDeviceCollection::ConstView digisDevEE, ScalarType *const sample_values, ScalarType *const sample_value_errors, bool *const useless_sample_values, char *const pedestal_nums, ScalarType *const sumAAsNullHypot, ScalarType *const sum0sNullHypot, ScalarType *tMaxAlphaBetas, ScalarType *tMaxErrorAlphaBetas, ScalarType *g_accTimeMax, ScalarType *g_accTimeWgt, TimeComputationState *g_state, EcalMultifitParametersDevice::ConstView paramsDev, ConfigurationParameters::type const timeFitLimits_firstEB, ConfigurationParameters::type const timeFitLimits_firstEE, ConfigurationParameters::type const timeFitLimits_secondEB, ConfigurationParameters::type const timeFitLimits_secondEE) const
ALPAKA_FN_ACC constexpr float fast_logf(float x)
::ecal::multifit::SampleVector::Scalar ScalarType
::ecal::multifit::SampleVector::Scalar ScalarType
ALPAKA_FN_ACC uint32_t hashedIndexEB(uint32_t id)
ALPAKA_FN_ACC constexpr float fast_expf(float x)
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_time_computation_init const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
ALPAKA_FN_ACC uint32_t hashedIndexEE(uint32_t id)
::ecal::multifit::SampleVector::Scalar ScalarType
Abs< T >::type abs(const T &t)
constexpr int subdetId() const
get the contents of the subdetector field (not cast into any detector's numbering enum) ...
constexpr int adc(sample_type sample)
get the ADC sample (12 bits)
ALPAKA_FN_ACC void operator()(TAcc const &acc, EcalDigiDeviceCollection::ConstView digisDevEB, EcalDigiDeviceCollection::ConstView digisDevEE, EcalMultifitConditionsDevice::ConstView conditionsDev, ScalarType *sample_values, ScalarType *sample_value_errors, ScalarType *ampMaxError, bool *useless_sample_values, char *pedestal_nums) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, ScalarType *const sample_values, ScalarType *const sample_value_errors, bool *const useless_sample_values, ScalarType *chi2s, ScalarType *sum0s, ScalarType *sumAAs, uint32_t const nchannels) const
static const double tmax[3]
ALPAKA_FN_ACC void operator()(TAcc const &acc, EcalDigiDeviceCollection::ConstView digisDevEB, EcalDigiDeviceCollection::ConstView digisDevEE, EcalMultifitConditionsDevice::ConstView conditionsDev, ScalarType *sample_values, ScalarType *sample_value_errors, bool *useless_sample_values) const
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool use_sample(unsigned int sample_mask, unsigned int sample)
constexpr int gainId(sample_type sample)
get the gainId (2 bits)
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_makeratio const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_nullhypot const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
ALPAKA_FN_ACC void operator()(TAcc const &acc, EcalDigiDeviceCollection::ConstView digisDevEB, EcalDigiDeviceCollection::ConstView digisDevEE, ScalarType *const sample_values, ScalarType *const sample_value_errors, bool *const useless_samples, ScalarType *const g_tMaxAlphaBeta, ScalarType *const g_tMaxErrorAlphaBeta, ScalarType *const g_accTimeMax, ScalarType *const g_accTimeWgt, ScalarType *const sumAAsNullHypot, ScalarType *const sum0sNullHypot, ScalarType *const chi2sNullHypot, TimeComputationState *g_state, ScalarType *g_ampMaxAlphaBeta, ScalarType *g_ampMaxError, ScalarType *g_timeMax, ScalarType *g_timeError, EcalMultifitParametersDevice::ConstView paramsDev) const
::ecal::multifit::SampleVector::Scalar ScalarType
ALPAKA_FN_ACC void operator()(TAcc const &acc, EcalDigiDeviceCollection::ConstView digisDevEB, EcalDigiDeviceCollection::ConstView digisDevEE, EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEB, EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEE, EcalMultifitConditionsDevice::ConstView conditionsDev, ScalarType *const g_timeMax, ScalarType *const g_timeError, ConfigurationParameters::type const timeConstantTermEB, ConfigurationParameters::type const timeConstantTermEE, ConfigurationParameters::type const timeNconstEB, ConfigurationParameters::type const timeNconstEE, ConfigurationParameters::type const amplitudeThresholdEB, ConfigurationParameters::type const amplitudeThresholdEE, ConfigurationParameters::type const outOfTimeThreshG12pEB, ConfigurationParameters::type const outOfTimeThreshG12pEE, ConfigurationParameters::type const outOfTimeThreshG12mEB, ConfigurationParameters::type const outOfTimeThreshG12mEE, ConfigurationParameters::type const outOfTimeThreshG61pEB, ConfigurationParameters::type const outOfTimeThreshG61pEE, ConfigurationParameters::type const outOfTimeThreshG61mEB, ConfigurationParameters::type const outOfTimeThreshG61mEE) const
constexpr int gainId(sample_type sample)
get the gainId (2 bits)
::ecal::multifit::SampleVector::Scalar ScalarType
static constexpr int MAXSAMPLES
ALPAKA_ASSERT_ACC(offsets)
static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_findamplchi2_and_finish const &, TVec const &threadsPerBlock, TVec const &elemsPerThread, TArgs const &...) -> std::size_t
::ecal::multifit::SampleVector::Scalar ScalarType
uint16_t *__restrict__ uint16_t const *__restrict__ adc