CMS 3D CMS Logo

Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
cms::alpakatools Namespace Reference

Namespaces

 config
 
 detail
 
 impl
 
 traits
 

Classes

class  AtomicPairCounter
 
class  CachingAllocator
 
struct  CopyToDevice
 
struct  CopyToDevice< cms::alpakatest::AlpakaESTestDataB< alpaka_common::DevHost > >
 
struct  CopyToDevice< cms::alpakatest::AlpakaESTestDataEHost >
 
struct  CopyToDevice< PixelCPEFastParamsHost< TrackerTraits > >
 
struct  CopyToDevice< PortableHostCollection< TLayout > >
 
struct  CopyToDevice< PortableHostMultiCollection< TDev, T0, Args... > >
 
struct  CopyToDevice< PortableHostObject< TProduct > >
 
struct  CopyToHost
 
struct  CopyToHost< PortableDeviceCollection< TLayout, TDevice > >
 
struct  CopyToHost< PortableDeviceMultiCollection< TDev, T0, Args... > >
 
struct  CopyToHost< PortableDeviceObject< TProduct, TDevice > >
 
struct  CopyToHost< SiPixelClustersDevice< TDevice > >
 
struct  CopyToHost< SiPixelDigiErrorsDevice< TDevice > >
 
struct  CopyToHost< SiPixelDigisDevice< TDevice > >
 
struct  CopyToHost< TrackingRecHitDevice< TrackerTraits, TDevice > >
 
struct  CopyToHost< TracksDevice< TrackerTraits, TDevice > >
 
struct  CopyToHost< ZVertexDevice< TDevice > >
 
struct  countFromVector
 
struct  ElementIndex
 
class  EventCache
 
struct  fillFromVector
 
class  FlexiStorage
 
class  FlexiStorage< I, -1 >
 
class  HistoContainer
 
struct  multiBlockPrefixScan
 
class  OneToManyAssocBase
 
class  OneToManyAssocRandomAccess
 
class  OneToManyAssocSequential
 
class  QueueCache
 
struct  radixSortMultiWrapper
 
struct  radixSortMultiWrapper2
 
struct  requires_single_thread_per_block
 
class  ScopedContextAcquire
 
class  ScopedContextAnalyze
 
class  ScopedContextProduce
 
class  ScopedContextTask
 
struct  SimpleVector
 
class  VecArray
 

Typedefs

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using const_device_buffer = alpaka::ViewConst< device_buffer< TDev, T > >
 
template<typename T >
using const_host_buffer = alpaka::ViewConst< host_buffer< T > >
 
template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using device_buffer = typename detail::buffer_type< TDev, T >::type
 
template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using device_view = typename detail::view_type< TDev, T >::type
 
template<typename T >
using host_buffer = typename detail::buffer_type< DevHost, T >::type
 
template<typename T >
using host_view = typename detail::view_type< DevHost, T >::type
 

Enumerations

enum  AllocatorPolicy { AllocatorPolicy::Synchronous = 0, AllocatorPolicy::Asynchronous = 1, AllocatorPolicy::Caching = 2 }
 
enum  Backend : unsigned short {
  Backend::SerialSync = 0, Backend::CudaAsync = 1, Backend::ROCmAsync = 2, Backend::TbbAsync = 3,
  Backend::size
}
 

Functions

template<typename TElem , typename TIdx , typename TExtent , typename TQueue , typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
ALPAKA_FN_HOST auto allocCachedBuf (TDev const &dev, TQueue queue, TExtent const &extent=TExtent())
 
template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void blockPrefixScan (const TAcc &acc, T const *ci, T *co, int32_t size, T *ws=nullptr)
 
template<typename TAcc , typename T >
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void blockPrefixScan (const TAcc &acc, T *__restrict__ c, int32_t size, T *__restrict__ ws=nullptr)
 
template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
alpaka::Dev< TPlatform > const & chooseDevice (edm::StreamID id)
 
template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
std::vector< alpaka::Dev< TPlatform > > const & devices ()
 
constexpr Idx divide_up_by (Idx value, Idx divisor)
 
template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void dummyReorder (const TAcc &acc, T const *a, uint16_t *ind, uint16_t *ind2, uint32_t size)
 
template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void fillManyFromVector (Histo *__restrict__ h, uint32_t nh, T const *__restrict__ v, uint32_t const *__restrict__ offsets, uint32_t totSize, uint32_t nthreads, TQueue &queue)
 
template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void fillManyFromVector (Histo *__restrict__ h, typename Histo::View hv, uint32_t nh, T const *__restrict__ v, uint32_t const *__restrict__ offsets, uint32_t totSize, uint32_t nthreads, TQueue &queue)
 
template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins (Hist const &hist, V value, int n, Func func)
 
template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow (Hist const &hist, V wmin, V wmax, Func const &func)
 
template<typename TDev , typename TQueue , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
CachingAllocator< TDev, TQueue > & getDeviceCachingAllocator (TDev const &device)
 
template<typename Event >
EventCache< Event > & getEventCache ()
 
template<typename TQueue , typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
CachingAllocator< alpaka_common::DevHost, TQueue > & getHostCachingAllocator ()
 
template<typename Queue >
QueueCache< Queue > & getQueueCache ()
 
alpaka::DevCpu const & host ()
 
alpaka::PlatformCpu const & host_platform ()
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto independent_group_elements (TAcc const &acc, TArgs... args)
 
template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto independent_group_elements_along (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto independent_group_elements_x (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto independent_group_elements_y (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto independent_group_elements_z (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto independent_groups (TAcc const &acc, TArgs... args)
 
template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto independent_groups_along (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto independent_groups_x (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto independent_groups_y (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto independent_groups_z (TAcc const &acc, TArgs... args)
 
template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
constexpr bool isPowerOf2 (T v)
 
template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and not std::is_array_v< T >, device_buffer< TDev, T > > make_device_buffer (TDev const &device)
 
template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< TDev, T > > make_device_buffer (TDev const &device, Extent extent)
 
template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< TDev, T > > make_device_buffer (TDev const &device)
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and not std::is_array_v< T >, device_buffer< alpaka::Dev< TQueue >, T > > make_device_buffer (TQueue const &queue)
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< alpaka::Dev< TQueue >, T > > make_device_buffer (TQueue const &queue, Extent extent)
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< alpaka::Dev< TQueue >, T > > make_device_buffer (TQueue const &queue)
 
template<typename T , typename TDev >
std::enable_if_t< not std::is_array_v< T >, device_view< TDev, T > > make_device_view (TDev const &device, T &data)
 
template<typename T , typename TDev >
device_view< TDev, T[]> make_device_view (TDev const &device, T *data, Extent extent)
 
template<typename T , typename TDev >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_view< TDev, T > > make_device_view (TDev const &device, T &data, Extent extent)
 
template<typename T , typename TDev >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_view< TDev, T > > make_device_view (TDev const &device, T &data)
 
template<typename T >
std::enable_if_t< not std::is_array_v< T >, host_buffer< T > > make_host_buffer ()
 
template<typename T >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer (Extent extent)
 
template<typename T >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer ()
 
template<typename T , typename TPlatform >
std::enable_if_t< not std::is_array_v< T >, host_buffer< T > > make_host_buffer ()
 
template<typename T , typename TPlatform >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer (Extent extent)
 
template<typename T , typename TPlatform >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer ()
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and not std::is_array_v< T >, host_buffer< T > > make_host_buffer (TQueue const &queue)
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer (TQueue const &queue, Extent extent)
 
template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > > make_host_buffer (TQueue const &queue)
 
template<typename T >
std::enable_if_t< not std::is_array_v< T >, host_view< T > > make_host_view (T &data)
 
template<typename T >
host_view< T[]> make_host_view (T *data, Extent extent)
 
template<typename T >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_view< T > > make_host_view (T &data, Extent extent)
 
template<typename T >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_view< T > > make_host_view (T &data)
 
template<class T >
SimpleVector< Tmake_SimpleVector (int capacity, T *data)
 
template<class T >
SimpleVector< T > * make_SimpleVector (SimpleVector< T > *mem, int capacity, T *data)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
WorkDiv< Dim1Dmake_workdiv (Idx blocks, Idx elements)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
WorkDiv< alpaka::Dim< TAcc > > make_workdiv (const Vec< alpaka::Dim< TAcc >> &blocks, const Vec< alpaka::Dim< TAcc >> &elements)
 
void module_backend_config (edm::ConfigurationDescriptions &iDesc)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool once_per_block (TAcc const &acc)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool once_per_grid (TAcc const &acc)
 
template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
TPlatform const & platform ()
 
template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< std::is_unsigned< T >::value &&!requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void radixSort (const TAcc &acc, T const *a, uint16_t *ind, uint16_t *ind2, uint32_t size)
 
template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_INLINE void radixSort (const TAcc &acc, T const *a, uint16_t *ind, uint16_t *ind2, uint32_t size)
 
template<typename TAcc , typename T , int NS, typename RF >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void radixSortImpl (const TAcc &acc, T const *__restrict__ a, uint16_t *ind, uint16_t *ind2, uint32_t size, RF reorder)
 
template<typename TAcc , typename T , int NS = sizeof(T)>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void radixSortMulti (const TAcc &acc, T const *v, uint16_t *index, uint32_t const *offsets, uint16_t *workspace)
 
template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void reorderFloat (const TAcc &acc, T const *a, uint16_t *ind, uint16_t *ind2, uint32_t size)
 
template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void reorderSigned (const TAcc &acc, T const *a, uint16_t *ind, uint16_t *ind2, uint32_t size)
 
constexpr Idx round_up_by (Idx value, Idx divisor)
 
Backend toBackend (std::string_view name)
 
std::string_view toString (Backend backend)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto uniform_elements (TAcc const &acc, TArgs... args)
 
template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto uniform_elements_along (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto uniform_elements_nd (TAcc const &acc)
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto uniform_elements_nd (TAcc const &acc, alpaka::Vec< alpaka::Dim< TAcc >, Idx > extent)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto uniform_elements_x (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto uniform_elements_y (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto uniform_elements_z (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto uniform_group_elements (TAcc const &acc, TArgs... args)
 
template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto uniform_group_elements_along (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto uniform_group_elements_x (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto uniform_group_elements_y (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto uniform_group_elements_z (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto uniform_groups (TAcc const &acc, TArgs... args)
 
template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto uniform_groups_along (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto uniform_groups_x (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto uniform_groups_y (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto uniform_groups_z (TAcc const &acc, TArgs... args)
 
template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan (const TAcc &acc, int32_t laneId, T const *ci, T *co, uint32_t i, bool active=true)
 
template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan (const TAcc &acc, int32_t laneId, T *c, uint32_t i, bool active=true)
 

Variables

template<typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
constexpr AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous
 
template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
constexpr bool requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value
 

Typedef Documentation

◆ const_device_buffer

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using cms::alpakatools::const_device_buffer = typedef alpaka::ViewConst<device_buffer<TDev, T> >

Definition at line 180 of file memory.h.

◆ const_host_buffer

template<typename T >
using cms::alpakatools::const_host_buffer = typedef alpaka::ViewConst<host_buffer<T> >

Definition at line 60 of file memory.h.

◆ device_buffer

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using cms::alpakatools::device_buffer = typedef typename detail::buffer_type<TDev, T>::type

Definition at line 177 of file memory.h.

◆ device_view

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using cms::alpakatools::device_view = typedef typename detail::view_type<TDev, T>::type

Definition at line 257 of file memory.h.

◆ host_buffer

template<typename T >
using cms::alpakatools::host_buffer = typedef typename detail::buffer_type<DevHost, T>::type

Definition at line 57 of file memory.h.

◆ host_view

template<typename T >
using cms::alpakatools::host_view = typedef typename detail::view_type<DevHost, T>::type

Definition at line 150 of file memory.h.

Enumeration Type Documentation

◆ AllocatorPolicy

◆ Backend

enum cms::alpakatools::Backend : unsigned short
strong
Enumerator
SerialSync 
CudaAsync 
ROCmAsync 
TbbAsync 
size 

Definition at line 8 of file Backend.h.

Function Documentation

◆ allocCachedBuf()

template<typename TElem , typename TIdx , typename TExtent , typename TQueue , typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
ALPAKA_FN_HOST auto cms::alpakatools::allocCachedBuf ( TDev const &  dev,
TQueue  queue,
TExtent const &  extent = TExtent() 
)

Definition at line 197 of file CachedBufAlloc.h.

References createBeamHaloJobs::queue.

197  {
198  return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
199  }
ALPAKA_FN_HOST auto allocCachedBuf(TDev const &dev, TQueue queue, TExtent const &extent=TExtent())

◆ blockPrefixScan() [1/2]

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::blockPrefixScan ( const TAcc &  acc,
T const *  ci,
T co,
int32_t  size,
T ws = nullptr 
)

Definition at line 47 of file prefixScan.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::co, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), dqmdumpme::first, mps_fire::i, isPowerOf2(), warpPrefixScan(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

Referenced by cms::alpakatools::OneToManyAssocRandomAccess< I, NHISTS *NBINS+1, SIZE >::finalize(), gpuClustering::for(), pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), cms::alpakatools::multiBlockPrefixScan< T >::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::FillHitsModuleStart< TrackerTraits >::operator()().

48  {
49  if constexpr (!requires_single_thread_per_block_v<TAcc>) {
50  const auto warpSize = alpaka::warp::getSize(acc);
51  int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
52  int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
54  ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
55  ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
56  auto first = blockThreadIdx;
57  ALPAKA_ASSERT_ACC(isPowerOf2(warpSize));
58  auto laneId = blockThreadIdx & (warpSize - 1);
59  auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;
60 
61  for (auto i = first; i < warpUpRoundedSize; i += blockDimension) {
62  // When padding the warp, warpPrefixScan is a noop
63  warpPrefixScan(acc, laneId, ci, co, i, i < size);
64  if (i < size) {
65  // Skipped in warp padding threads.
66  auto warpId = i / warpSize;
67  ALPAKA_ASSERT_ACC(warpId < warpSize);
68  if ((warpSize - 1) == laneId)
69  ws[warpId] = co[i];
70  }
71  }
72  alpaka::syncBlockThreads(acc);
73  if (size <= warpSize)
74  return;
75  if (blockThreadIdx < warpSize) {
76  warpPrefixScan(acc, laneId, ws, blockThreadIdx);
77  }
78  alpaka::syncBlockThreads(acc);
79  for (auto i = first + warpSize; i < size; i += blockDimension) {
80  int32_t warpId = i / warpSize;
81  co[i] += ws[warpId - 1];
82  }
83  alpaka::syncBlockThreads(acc);
84  } else {
85  co[0] = ci[0];
86  for (int32_t i = 1; i < size; ++i)
87  co[i] = ci[i] + co[i - 1];
88  }
89  }
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan(const TAcc &acc, int32_t laneId, T *c, uint32_t i, bool active=true)
Definition: prefixScan.h:40
__host__ __device__ VT * co
Definition: prefixScan.h:47
__host__ __device__ VT uint32_t size
Definition: prefixScan.h:47
constexpr bool isPowerOf2(T v)
Definition: prefixScan.h:11

◆ blockPrefixScan() [2/2]

template<typename TAcc , typename T >
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void cms::alpakatools::blockPrefixScan ( const TAcc &  acc,
T *__restrict__  c,
int32_t  size,
T *__restrict__  ws = nullptr 
)

Definition at line 92 of file prefixScan.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), HltBtagPostValidation_cff::c, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), dqmdumpme::first, mps_fire::i, warpPrefixScan(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

95  {
96  if constexpr (!requires_single_thread_per_block_v<TAcc>) {
97  const auto warpSize = alpaka::warp::getSize(acc);
98  int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
99  int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
101  ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
102  ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
103  auto first = blockThreadIdx;
104  auto laneId = blockThreadIdx & (warpSize - 1);
105  auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;
106 
107  for (auto i = first; i < warpUpRoundedSize; i += blockDimension) {
108  // When padding the warp, warpPrefixScan is a noop
109  warpPrefixScan(acc, laneId, c, i, i < size);
110  if (i < size) {
111  // Skipped in warp padding threads.
112  auto warpId = i / warpSize;
113  ALPAKA_ASSERT_ACC(warpId < warpSize);
114  if ((warpSize - 1) == laneId)
115  ws[warpId] = c[i];
116  }
117  }
118  alpaka::syncBlockThreads(acc);
119  if (size <= warpSize)
120  return;
121  if (blockThreadIdx < warpSize) {
122  warpPrefixScan(acc, laneId, ws, blockThreadIdx);
123  }
124  alpaka::syncBlockThreads(acc);
125  for (auto i = first + warpSize; i < size; i += blockDimension) {
126  auto warpId = i / warpSize;
127  c[i] += ws[warpId - 1];
128  }
129  alpaka::syncBlockThreads(acc);
130  } else {
131  for (int32_t i = 1; i < size; ++i)
132  c[i] += c[i - 1];
133  }
134  }
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan(const TAcc &acc, int32_t laneId, T *c, uint32_t i, bool active=true)
Definition: prefixScan.h:40
__host__ __device__ VT uint32_t size
Definition: prefixScan.h:47

◆ chooseDevice()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
alpaka::Dev<TPlatform> const& cms::alpakatools::chooseDevice ( edm::StreamID  id)

Definition at line 16 of file chooseDevice.h.

References cms::Exception::addContext(), and devices().

16  {
18  if (not service->enabled()) {
19  cms::Exception ex("RuntimeError");
20  ex << "Unable to choose current device because " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " is disabled.\n"
21  << "If " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " was not explicitly disabled in the configuration,\n"
22  << "the probable cause is that there is no accelerator or there is some problem\n"
23  << "with the accelerator runtime or drivers.";
24  ex.addContext("Calling cms::alpakatools::chooseDevice()");
25  throw ex;
26  }
27 
28  // For startes we "statically" assign the device based on
29  // edm::Stream number. This is suboptimal if the number of
30  // edm::Streams is not a multiple of the number of devices
31  // (and even then there is no load balancing).
32 
33  // TODO: improve the "assignment" logic
34  auto const& devices = cms::alpakatools::devices<TPlatform>();
35  return devices[id % devices.size()];
36  }
std::vector< alpaka::Dev< TPlatform > > const & devices()
Definition: devices.h:22

◆ devices()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
std::vector<alpaka::Dev<TPlatform> > const& cms::alpakatools::devices ( )
inline

◆ divide_up_by()

constexpr Idx cms::alpakatools::divide_up_by ( Idx  value,
Idx  divisor 
)
inline

◆ dummyReorder()

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::dummyReorder ( const TAcc &  acc,
T const *  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size 
)

Definition at line 17 of file radixSort.h.

18  {}

◆ fillManyFromVector() [1/2]

template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void cms::alpakatools::fillManyFromVector ( Histo *__restrict__  h,
uint32_t  nh,
T const *__restrict__  v,
uint32_t const *__restrict__  offsets,
uint32_t  totSize,
uint32_t  nthreads,
TQueue &  queue 
)

Definition at line 59 of file HistoContainer.h.

References divide_up_by(), h, cms::cuda::nh, cms::cuda::nthreads, cms::cuda::offsets, createBeamHaloJobs::queue, svgfig::template(), cms::cuda::totSize, and cms::cuda::v.

Referenced by SiPixelRecHitSoAFromLegacyT< TrackerTraits >::produce().

65  {
66  Histo::template launchZero<TAcc>(h, queue);
67 
68  const auto threadsPerBlockOrElementsPerThread = nthreads;
69  const auto blocksPerGrid = divide_up_by(totSize, nthreads);
70  const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
71 
72  alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
73  Histo::template launchFinalize<TAcc>(h, queue);
74 
75  alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
76  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
uint32_t T const *__restrict__ uint32_t const *__restrict__ offsets
uint32_t T const *__restrict__ v
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
uint32_t nh
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t totSize

◆ fillManyFromVector() [2/2]

template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void cms::alpakatools::fillManyFromVector ( Histo *__restrict__  h,
typename Histo::View  hv,
uint32_t  nh,
T const *__restrict__  v,
uint32_t const *__restrict__  offsets,
uint32_t  totSize,
uint32_t  nthreads,
TQueue &  queue 
)

Definition at line 79 of file HistoContainer.h.

References divide_up_by(), h, cms::cuda::nh, cms::cuda::nthreads, cms::cuda::offsets, createBeamHaloJobs::queue, svgfig::template(), cms::cuda::totSize, and cms::cuda::v.

86  {
87  Histo::template launchZero<TAcc>(hv, queue);
88 
89  const auto threadsPerBlockOrElementsPerThread = nthreads;
90  const auto blocksPerGrid = divide_up_by(totSize, nthreads);
91  const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
92 
93  alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
94  Histo::template launchFinalize<TAcc>(h, queue);
95 
96  alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
97  }
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:20
uint32_t T const *__restrict__ uint32_t const *__restrict__ offsets
uint32_t T const *__restrict__ v
def template(fileName, svg, replaceme="REPLACEME")
Definition: svgfig.py:521
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int nthreads
uint32_t nh
The Signals That Services Can Subscribe To This is based on ActivityRegistry h
Helper function to determine trigger accepts.
Definition: Activities.doc:4
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t totSize

◆ forEachInBins()

template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::forEachInBins ( Hist const &  hist,
value,
int  n,
Func  func 
)

Definition at line 101 of file HistoContainer.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::be, newFWLiteAna::bin, cms::cuda::bs, cms::cuda::func, compareTotals::hist, SiStripPI::max, SiStripPI::min, cms::cuda::n, and LaserClient_cfi::nbins.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN::operator()().

101  {
102  int bs = Hist::bin(value);
103  int be = std::min(int(Hist::nbins() - 1), bs + n);
104  bs = std::max(0, bs - n);
105  ALPAKA_ASSERT_ACC(be >= bs);
106  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
107  func(*pj);
108  }
109  }
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
Definition: value.py:1
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int n

◆ forEachInWindow()

template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::forEachInWindow ( Hist const &  hist,
wmin,
wmax,
Func const &  func 
)

Definition at line 113 of file HistoContainer.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::be, newFWLiteAna::bin, cms::cuda::bs, cms::cuda::func, compareTotals::hist, cms::cuda::wmax, and cms::cuda::wmin.

113  {
114  auto bs = Hist::bin(wmin);
115  auto be = Hist::bin(wmax);
116  ALPAKA_ASSERT_ACC(be >= bs);
117  for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
118  func(*pj);
119  }
120  }
uint32_t T const *__restrict__ uint32_t const *__restrict__ int32_t int Histo::index_type cudaStream_t Func __host__ __device__ V int Func func
__host__ __device__ V wmin
__host__ __device__ V V wmax

◆ getDeviceCachingAllocator()

template<typename TDev , typename TQueue , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
CachingAllocator<TDev, TQueue>& cms::alpakatools::getDeviceCachingAllocator ( TDev const &  device)
inline

Definition at line 78 of file getDeviceCachingAllocator.h.

References cms::cuda::assert(), CMS_THREAD_SAFE, and devices().

78  {
79  // initialise all allocators, one per device
80  CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>();
81 
82  size_t const index = alpaka::getNativeHandle(device);
83  assert(index < cms::alpakatools::devices<alpaka::Platform<TDev>>().size());
84 
85  // the public interface is thread safe
86  return allocators[index];
87  }
size
Write out results.
assert(be >=bs)
#define CMS_THREAD_SAFE
std::vector< alpaka::Dev< TPlatform > > const & devices()
Definition: devices.h:22

◆ getEventCache()

template<typename Event >
EventCache<Event>& cms::alpakatools::getEventCache ( )

Definition at line 92 of file EventCache.h.

References utilities::cache(), and CMS_THREAD_SAFE.

92  {
93  // the public interface is thread safe
94  CMS_THREAD_SAFE static EventCache<Event> cache;
95  return cache;
96  }
#define CMS_THREAD_SAFE
def cache(function)
Definition: utilities.py:3

◆ getHostCachingAllocator()

template<typename TQueue , typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
CachingAllocator<alpaka_common::DevHost, TQueue>& cms::alpakatools::getHostCachingAllocator ( )
inline

Definition at line 16 of file getHostCachingAllocator.h.

References cms::alpakatools::config::binGrowth, CMS_THREAD_SAFE, host(), cms::alpakatools::config::maxBin, cms::alpakatools::config::maxCachedBytes, cms::alpakatools::config::maxCachedFraction, and cms::alpakatools::config::minBin.

16  {
17  // thread safe initialisation of the host allocator
18  CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
19  host(),
25  false, // reuseSameQueueAllocations
26  false); // debug
27 
28  // the public interface is thread safe
29  return allocator;
30  }
string host
Definition: query.py:115
constexpr unsigned int minBin
constexpr unsigned int maxBin
constexpr double maxCachedFraction
#define CMS_THREAD_SAFE
constexpr size_t maxCachedBytes
constexpr unsigned int binGrowth

◆ getQueueCache()

template<typename Queue >
QueueCache<Queue>& cms::alpakatools::getQueueCache ( )

Definition at line 65 of file QueueCache.h.

References utilities::cache(), and CMS_THREAD_SAFE.

65  {
66  // the public interface is thread safe
67  CMS_THREAD_SAFE static QueueCache<Queue> cache;
68  return cache;
69  }
#define CMS_THREAD_SAFE
def cache(function)
Definition: utilities.py:3

◆ host()

alpaka::DevCpu const& cms::alpakatools::host ( )
inline

◆ host_platform()

alpaka::PlatformCpu const& cms::alpakatools::host_platform ( )
inline

Definition at line 11 of file host.h.

11 { return platform<alpaka::PlatformCpu>(); }

◆ independent_group_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements ( TAcc const &  acc,
TArgs...  args 
)
inline

◆ independent_group_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_along ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1343 of file workdivision.h.

References writedatasetfile::args.

1343  {
1344  return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
1345  }
uint32_t Idx
Definition: config.h:14

◆ independent_group_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_x ( TAcc const &  acc,
TArgs...  args 
)
inline

◆ independent_group_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_y ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1363 of file workdivision.h.

References writedatasetfile::args.

◆ independent_group_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_z ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1370 of file workdivision.h.

References writedatasetfile::args.

◆ independent_groups()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto cms::alpakatools::independent_groups ( TAcc const &  acc,
TArgs...  args 
)
inline

◆ independent_groups_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_along ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1184 of file workdivision.h.

References writedatasetfile::args.

1184  {
1185  return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
1186  }
uint32_t Idx
Definition: config.h:14

◆ independent_groups_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_x ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1197 of file workdivision.h.

References writedatasetfile::args.

◆ independent_groups_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_y ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1204 of file workdivision.h.

References writedatasetfile::args.

◆ independent_groups_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_z ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1211 of file workdivision.h.

References writedatasetfile::args.

◆ isPowerOf2()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
constexpr bool cms::alpakatools::isPowerOf2 ( T  v)

Definition at line 11 of file prefixScan.h.

References findQualityFiles::v.

Referenced by blockPrefixScan().

11  {
12  // returns true iif v has only one bit set.
13  while (v) {
14  if (v & 1)
15  return !(v >> 1);
16  else
17  v >>= 1;
18  }
19  return false;
20  }

◆ make_device_buffer() [1/6]

template<typename T , typename TDev >
std::enable_if_t<alpaka::isDevice<TDev> and not std::is_array_v<T>, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer ( TDev const &  device)

Definition at line 185 of file memory.h.

186  {
187  return alpaka::allocBuf<T, Idx>(device, Scalar{});
188  }
Vec< Dim0D > Scalar
Definition: config.h:28

◆ make_device_buffer() [2/6]

template<typename T , typename TDev >
std::enable_if_t<alpaka::isDevice<TDev> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer ( TDev const &  device,
Extent  extent 
)

Definition at line 194 of file memory.h.

194  {
195  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(device, Vec1D{extent});
196  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_device_buffer() [3/6]

template<typename T , typename TDev >
std::enable_if_t<alpaka::isDevice<TDev> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer ( TDev const &  device)

Definition at line 202 of file memory.h.

202  {
203  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(device, Vec1D{std::extent_v<T>});
204  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_device_buffer() [4/6]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and not std::is_array_v<T>, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer ( TQueue const &  queue)

Definition at line 210 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

210  {
211  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
212  return allocCachedBuf<T, Idx>(alpaka::getDev(queue), queue, Scalar{});
213  }
214  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
215  return alpaka::allocAsyncBuf<T, Idx>(queue, Scalar{});
216  }
217  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
218  return alpaka::allocBuf<T, Idx>(alpaka::getDev(queue), Scalar{});
219  }
220  }
Vec< Dim0D > Scalar
Definition: config.h:28
constexpr AllocatorPolicy allocator_policy

◆ make_device_buffer() [5/6]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer ( TQueue const &  queue,
Extent  extent 
)

Definition at line 226 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

226  {
227  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
228  return allocCachedBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), queue, Vec1D{extent});
229  }
230  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
231  return alpaka::allocAsyncBuf<std::remove_extent_t<T>, Idx>(queue, Vec1D{extent});
232  }
233  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
234  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), Vec1D{extent});
235  }
236  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
constexpr AllocatorPolicy allocator_policy

◆ make_device_buffer() [6/6]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer ( TQueue const &  queue)

Definition at line 242 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

242  {
243  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
244  return allocCachedBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), queue, Vec1D{std::extent_v<T>});
245  }
246  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
247  return alpaka::allocAsyncBuf<std::remove_extent_t<T>, Idx>(queue, Vec1D{std::extent_v<T>});
248  }
249  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
250  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), Vec1D{std::extent_v<T>});
251  }
252  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
constexpr AllocatorPolicy allocator_policy

◆ make_device_view() [1/4]

template<typename T , typename TDev >
std::enable_if_t<not std::is_array_v<T>, device_view<TDev, T> > cms::alpakatools::make_device_view ( TDev const &  device,
T data 
)

◆ make_device_view() [2/4]

template<typename T , typename TDev >
device_view<TDev, T[]> cms::alpakatools::make_device_view ( TDev const &  device,
T data,
Extent  extent 
)

Definition at line 265 of file memory.h.

References data.

265  {
266  return alpaka::ViewPlainPtr<TDev, T, Dim1D, Idx>(data, device, Vec1D{extent});
267  }
Vec< Dim1D > Vec1D
Definition: config.h:25
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_device_view() [3/4]

template<typename T , typename TDev >
std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_view<TDev, T> > cms::alpakatools::make_device_view ( TDev const &  device,
T data,
Extent  extent 
)

Definition at line 271 of file memory.h.

References data.

271  {
272  return alpaka::ViewPlainPtr<TDev, std::remove_extent_t<T>, Dim1D, Idx>(data, device, Vec1D{extent});
273  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
alpaka::DimInt< 1u > Dim1D
Definition: config.h:19
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_device_view() [4/4]

template<typename T , typename TDev >
std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_view<TDev, T> > cms::alpakatools::make_device_view ( TDev const &  device,
T data 
)

Definition at line 277 of file memory.h.

References data.

277  {
278  return alpaka::ViewPlainPtr<TDev, std::remove_extent_t<T>, Dim1D, Idx>(data, device, Vec1D{std::extent_v<T>});
279  }
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
alpaka::DimInt< 1u > Dim1D
Definition: config.h:19
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_host_buffer() [1/9]

template<typename T >
std::enable_if_t<not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 65 of file memory.h.

References host().

65  {
66  return alpaka::allocBuf<T, Idx>(host(), Scalar{});
67  }
string host
Definition: query.py:115
Vec< Dim0D > Scalar
Definition: config.h:28

◆ make_host_buffer() [2/9]

template<typename T >
std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( Extent  extent)

Definition at line 71 of file memory.h.

References host().

71  {
72  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(host(), Vec1D{extent});
73  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_host_buffer() [3/9]

template<typename T >
std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 77 of file memory.h.

References host().

77  {
78  return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(host(), Vec1D{std::extent_v<T>});
79  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_host_buffer() [4/9]

template<typename T , typename TPlatform >
std::enable_if_t<not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 85 of file memory.h.

References host().

85  {
86  using Platform = TPlatform;
87  return alpaka::allocMappedBuf<Platform, T, Idx>(host(), platform<Platform>(), Scalar{});
88  }
string host
Definition: query.py:115
Vec< Dim0D > Scalar
Definition: config.h:28

◆ make_host_buffer() [5/9]

template<typename T , typename TPlatform >
std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( Extent  extent)

Definition at line 92 of file memory.h.

References host().

92  {
93  using Platform = TPlatform;
94  return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(host(), platform<Platform>(), Vec1D{extent});
95  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_host_buffer() [6/9]

template<typename T , typename TPlatform >
std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 99 of file memory.h.

References host().

99  {
100  using Platform = TPlatform;
101  return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
102  host(), platform<Platform>(), Vec1D{std::extent_v<T>});
103  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14

◆ make_host_buffer() [7/9]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( TQueue const &  queue)

Definition at line 109 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

110  {
111  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
112  return allocCachedBuf<T, Idx>(host(), queue, Scalar{});
113  } else {
114  using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
115  return alpaka::allocMappedBuf<Platform, T, Idx>(host(), platform<Platform>(), Scalar{});
116  }
117  }
string host
Definition: query.py:115
Vec< Dim0D > Scalar
Definition: config.h:28
constexpr AllocatorPolicy allocator_policy

◆ make_host_buffer() [8/9]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( TQueue const &  queue,
Extent  extent 
)

Definition at line 123 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

123  {
124  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
125  return allocCachedBuf<std::remove_extent_t<T>, Idx>(host(), queue, Vec1D{extent});
126  } else {
127  using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
128  return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
129  host(), platform<Platform>(), Vec1D{extent});
130  }
131  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
constexpr AllocatorPolicy allocator_policy

◆ make_host_buffer() [9/9]

template<typename T , typename TQueue >
std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( TQueue const &  queue)

Definition at line 137 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

137  {
138  if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
139  return allocCachedBuf<std::remove_extent_t<T>, Idx>(host(), queue, Vec1D{std::extent_v<T>});
140  } else {
141  using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
142  return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
143  host(), platform<Platform>(), Vec1D{std::extent_v<T>});
144  }
145  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
constexpr AllocatorPolicy allocator_policy

◆ make_host_view() [1/4]

template<typename T >
std::enable_if_t<not std::is_array_v<T>, host_view<T> > cms::alpakatools::make_host_view ( T data)

◆ make_host_view() [2/4]

template<typename T >
host_view<T[]> cms::alpakatools::make_host_view ( T data,
Extent  extent 
)

Definition at line 158 of file memory.h.

References data, and host().

158  {
159  return alpaka::ViewPlainPtr<DevHost, T, Dim1D, Idx>(data, host(), Vec1D{extent});
160  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_host_view() [3/4]

template<typename T >
std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_view<T> > cms::alpakatools::make_host_view ( T data,
Extent  extent 
)

Definition at line 164 of file memory.h.

References data, and host().

164  {
165  return alpaka::ViewPlainPtr<DevHost, std::remove_extent_t<T>, Dim1D, Idx>(data, host(), Vec1D{extent});
166  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
alpaka::DimInt< 1u > Dim1D
Definition: config.h:19
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_host_view() [4/4]

template<typename T >
std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_view<T> > cms::alpakatools::make_host_view ( T data)

Definition at line 170 of file memory.h.

References data, and host().

170  {
171  return alpaka::ViewPlainPtr<DevHost, std::remove_extent_t<T>, Dim1D, Idx>(data, host(), Vec1D{std::extent_v<T>});
172  }
string host
Definition: query.py:115
Vec< Dim1D > Vec1D
Definition: config.h:25
uint32_t Idx
Definition: config.h:14
alpaka::DimInt< 1u > Dim1D
Definition: config.h:19
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80

◆ make_SimpleVector() [1/2]

template<class T >
SimpleVector<T> cms::alpakatools::make_SimpleVector ( int  capacity,
T data 
)

Definition at line 126 of file SimpleVector.h.

References gpuVertexFinder::capacity(), data, and runTheMatrix::ret.

126  {
127  SimpleVector<T> ret;
128  ret.construct(capacity, data);
129  return ret;
130  }
ret
prodAgent to be discontinued
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
size d for d tracks hist hist capacity()

◆ make_SimpleVector() [2/2]

template<class T >
SimpleVector<T>* cms::alpakatools::make_SimpleVector ( SimpleVector< T > *  mem,
int  capacity,
T data 
)

Definition at line 134 of file SimpleVector.h.

References gpuVertexFinder::capacity(), data, mem, and runTheMatrix::ret.

134  {
135  auto ret = new (mem) SimpleVector<T>();
136  ret->construct(capacity, data);
137  return ret;
138  }
ret
prodAgent to be discontinued
uint16_t mem[nChs][nEvts]
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
size d for d tracks hist hist capacity()

◆ make_workdiv() [1/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
WorkDiv<Dim1D> cms::alpakatools::make_workdiv ( Idx  blocks,
Idx  elements 
)
inline

Definition at line 47 of file workdivision.h.

References gather_cfg::blocks, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and bookConverter::elements.

47  {
48  if constexpr (not requires_single_thread_per_block_v<TAcc>) {
49  // On GPU backends, each thread is looking at a single element:
50  // - the number of threads per block is "elements";
51  // - the number of elements per thread is always 1.
52  return WorkDiv<Dim1D>(blocks, elements, Idx{1});
53  } else {
54  // On CPU backends, run serially with a single thread per block:
55  // - the number of threads per block is always 1;
56  // - the number of elements per thread is "elements".
57  return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
58  }
59  }
uint32_t Idx
Definition: config.h:14
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
Definition: config.h:31

◆ make_workdiv() [2/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
WorkDiv<alpaka::Dim<TAcc> > cms::alpakatools::make_workdiv ( const Vec< alpaka::Dim< TAcc >> &  blocks,
const Vec< alpaka::Dim< TAcc >> &  elements 
)
inline

Definition at line 63 of file workdivision.h.

References gather_cfg::blocks, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and bookConverter::elements.

64  {
65  using Dim = alpaka::Dim<TAcc>;
66  if constexpr (not requires_single_thread_per_block_v<TAcc>) {
67  // On GPU backends, each thread is looking at a single element:
68  // - the number of threads per block is "elements";
69  // - the number of elements per thread is always 1.
71  } else {
72  // On CPU backends, run serially with a single thread per block:
73  // - the number of threads per block is always 1;
74  // - the number of elements per thread is "elements".
76  }
77  }
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
Definition: config.h:31
alpaka::Vec< TDim, Idx > Vec
Definition: config.h:24

◆ module_backend_config()

void cms::alpakatools::module_backend_config ( edm::ConfigurationDescriptions iDesc)

Definition at line 13 of file module_backend_config.cc.

References edm::ParameterSetDescription::addUntracked(), edm::ConfigurationDescriptions::defaultDescription(), edm::ParameterSetDescription::isLabelUnused(), kComment, AlCaHLTBitMon_QueryRunRegistry::string, and findQualityFiles::v.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ESProducer::prevalidate(), and ALPAKA_ACCELERATOR_NAMESPACE::ProducerBase< BaseT, Args >::prevalidate().

13  {
14  // the code below leads to 'alpaka = untracked.PSet(backend = untracked.string)' to be added to the generated cfi files
15  // TODO: I don't know if this is a desired behavior for HLT
17  descAlpaka.addUntracked<std::string>("backend", "")
18  ->setComment(
19  "Alpaka backend for this module. Can be empty string (for the global default), 'serial_sync', or "
20  " - depending on the architecture and available hardware - 'cuda_async', 'rocm_async'");
21 
22  if (iDesc.defaultDescription()) {
23  if (iDesc.defaultDescription()->isLabelUnused(kPSetName)) {
24  iDesc.defaultDescription()
25  ->addUntracked<edm::ParameterSetDescription>(kPSetName, descAlpaka)
26  ->setComment(kComment);
27  }
28  }
29  for (auto& v : iDesc) {
30  if (v.second.isLabelUnused(kPSetName)) {
31  v.second.addUntracked<edm::ParameterSetDescription>(kPSetName, descAlpaka)->setComment(kComment);
32  }
33  }
34  }
bool isLabelUnused(std::string const &label) const
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
ParameterSetDescription * defaultDescription()
Returns 0 if no default has been assigned.
static const char *const kComment

◆ once_per_block()

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool cms::alpakatools::once_per_block ( TAcc const &  acc)
inline

◆ once_per_grid()

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool cms::alpakatools::once_per_grid ( TAcc const &  acc)
inline

Definition at line 1382 of file workdivision.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernel::operator()(), pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::InitDoublets< TrackerTraits >::operator()(), calibPixel::CalibDigis< debug >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_BLFastFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_FastFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_checkOverflows< TrackerTraits >::operator()(), calibPixel::CalibDigisPhase2::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::CountModules< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoStructKernel::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdate::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdateMulti2::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdateMulti3::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::RawToDigi_kernel< debug >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_connect< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_find_ntuplets< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fillNLayers< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::SeedingTopoThresh::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::PrepareTopoInputs::operator()().

1382  {
1383  return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) == Vec<alpaka::Dim<TAcc>>::zeros();
1384  }
alpaka::Vec< TDim, Idx > Vec
Definition: config.h:24

◆ platform()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
TPlatform const& cms::alpakatools::platform ( )
inline

Definition at line 14 of file devices.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::AlpakaService::AlpakaService().

14  {
15  // initialise the platform the first time that this function is called
16  static const auto platform = TPlatform{};
17  return platform;
18  }
TPlatform const & platform()
Definition: devices.h:14

◆ radixSort() [1/2]

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< std::is_unsigned< T >::value &&!requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSort ( const TAcc &  acc,
T const *  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size 
)

Definition at line 324 of file radixSort.h.

References a.

325  {
326  radixSortImpl<TAcc, T, NS>(acc, a, ind, ind2, size, dummyReorder<TAcc, T>);
327  }
size
Write out results.
double a
Definition: hdecay.h:121

◆ radixSort() [2/2]

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_INLINE void cms::alpakatools::radixSort ( const TAcc &  acc,
T const *  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size 
)

Definition at line 357 of file radixSort.h.

References a, and testProducerWithPsetDescEmpty_cfi::i1.

357  {
358  static_assert(requires_single_thread_per_block_v<TAcc>, "CPU sort (not a radixSort) called wtth wrong accelerator");
359  // Initialize the index array
360  std::iota(ind, ind + size, 0);
361  /*
362  printf("std::stable_sort(a=%p, ind=%p, indmax=%p, size=%d)\n", a, ind, ind + size, size);
363  for (uint32_t i=0; i<10 && i<size; i++) {
364  printf ("a[%d]=%ld ", i, (long int)a[i]);
365  }
366  printf("\n");
367  for (uint32_t i=0; i<10 && i<size; i++) {
368  printf ("ind[%d]=%d ", i, ind[i]);
369  }
370  printf("\n");
371  */
372  std::stable_sort(ind, ind + size, [a](uint16_t i0, uint16_t i1) { return a[i0] < a[i1]; });
373  /*
374  for (uint32_t i=0; i<10 && i<size; i++) {
375  printf ("ind[%d]=%d ", i, ind[i]);
376  }
377  printf("\n");
378  */
379  }
size
Write out results.
double a
Definition: hdecay.h:121

◆ radixSortImpl()

template<typename TAcc , typename T , int NS, typename RF >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSortImpl ( const TAcc &  acc,
T const *__restrict__  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size,
RF  reorder 
)

Definition at line 97 of file radixSort.h.

References a, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::assert(), cms::cudacompat::atomicAdd(), cms::cudacompat::atomicMax(), newFWLiteAna::bin, HltBtagPostValidation_cff::c, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), mps_fire::i, heavyIonCSV_trainingSettings::idx, independent_group_elements(), createfilelist::int, dqmiolumiharvest::j, dqmdumpme::k, hltrates_dqm_sourceclient-live_cfg::offset, trackingPlots::reorder, contentValuesCheck::ss, submitPVValidationJobs::t, ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::threadIdxLocal(), dqmMemoryStats::total, and x.

98  {
99  if constexpr (!requires_single_thread_per_block_v<TAcc>) {
100  const auto warpSize = alpaka::warp::getSize(acc);
101  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
102  [[maybe_unused]] const uint32_t blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
103  // we expect a power of 2 here
104  assert(warpSize && (0 == (warpSize & (warpSize - 1))));
105  const std::size_t warpMask = warpSize - 1;
106 
107  // Define the bin size (d=8 => 1 byte bin).
108  constexpr int binBits = 8, dataBits = 8 * sizeof(T), totalSortingPassses = dataBits / binBits;
109  // Make sure the slices are data aligned
110  static_assert(0 == dataBits % binBits);
111  // Make sure the NS parameter makes sense
112  static_assert(NS > 0 && NS <= sizeof(T));
113  constexpr int binsNumber = 1 << binBits;
114  constexpr int binsMask = binsNumber - 1;
115  // Prefix scan iterations. NS is counted in full bytes and not slices.
116  constexpr int initialSortingPass = int(sizeof(T)) - NS;
117 
118  // Count/index for the prefix scan
119  // TODO: rename
120  auto& c = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
121  // Temporary storage for prefix scan. Only really needed for first-of-warp keeping
122  // Then used for thread to bin mapping TODO: change type to byte and remap to
123  auto& ct = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
124  // Bin to thread index mapping (used to store the highest thread index within a bin number
125  // batch of threads.
126  // TODO: currently initialized to an invalid value, but could also be initialized to the
127  // lowest possible value (change to bytes?)
128  auto& cu = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
129  // TODO we could also have an explicit caching of the current index for each thread.
130 
131  // TODO: do those have to be shared?
132  auto& ibs = alpaka::declareSharedVar<int, __COUNTER__>(acc);
133  auto& currentSortingPass = alpaka::declareSharedVar<int, __COUNTER__>(acc);
134 
135  ALPAKA_ASSERT_ACC(size > 0);
136  // TODO: is this a hard requirement?
137  ALPAKA_ASSERT_ACC(blockDimension >= binsNumber);
138 
139  currentSortingPass = initialSortingPass;
140 
141  auto j = ind;
142  auto k = ind2;
143 
144  // Initializer index order to trivial increment.
145  for (auto idx : independent_group_elements(acc, size)) {
146  j[idx] = idx;
147  }
148  alpaka::syncBlockThreads(acc);
149 
150  // Iterate on the slices of the data.
151  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, (currentSortingPass < totalSortingPassses))) {
152  for (auto idx : independent_group_elements(acc, binsNumber)) {
153  c[idx] = 0;
154  }
155  alpaka::syncBlockThreads(acc);
156  const auto sortingPassShift = binBits * currentSortingPass;
157 
158  // fill bins (count elements in each bin)
159  for (auto idx : independent_group_elements(acc, size)) {
160  auto bin = (a[j[idx]] >> sortingPassShift) & binsMask;
161  alpaka::atomicAdd(acc, &c[bin], 1, alpaka::hierarchy::Threads{});
162  }
163  alpaka::syncBlockThreads(acc);
164 
165  if (!threadIdxLocal && 1 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]) {
166  // printf("Pass=%d, Block=%d, ", currentSortingPass - 1, alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]);
167  size_t total = 0;
168  for (int i = 0; i < (int)binsNumber; i++) {
169  // printf("count[%d]=%d ", i, c[i] );
170  total += c[i];
171  }
172  // printf("total=%zu\n", total);
173  assert(total == size);
174  }
175  // prefix scan "optimized"???...
176  // TODO: we might be able to reuse the warpPrefixScan function
177  // Warp level prefix scan
178  for (auto idx : independent_group_elements(acc, binsNumber)) {
179  auto x = c[idx];
180  auto laneId = idx & warpMask;
181 
182  for (int offset = 1; offset < warpSize; offset <<= 1) {
183  auto y = alpaka::warp::shfl(acc, x, laneId - offset);
184  if (laneId >= (uint32_t)offset)
185  x += y;
186  }
187  ct[idx] = x;
188  }
189  alpaka::syncBlockThreads(acc);
190 
191  // Block level completion of prefix scan (add last sum of each preceding warp)
192  for (auto idx : independent_group_elements(acc, binsNumber)) {
193  auto ss = (idx / warpSize) * warpSize - 1;
194  c[idx] = ct[idx];
195  for (int i = ss; i > 0; i -= warpSize)
196  c[idx] += ct[i];
197  }
198  // Post prefix scan, c[bin] contains the offsets in index counts to the last index +1 for each bin
199 
200  /*
201  //prefix scan for the nulls (for documentation)
202  if (threadIdxLocal==0)
203  for (int i = 1; i < sb; ++i) c[i] += c[i-1];
204  */
205 
206  // broadcast: we will fill the new index array downward, from offset c[bin], with one thread per
207  // bin, working on one set of bin size elements at a time.
208  // This will reorder the indices by the currently considered slice, otherwise preserving the previous order.
209  ibs = size - 1;
210  alpaka::syncBlockThreads(acc);
211 
212  // Iterate on bin-sized slices to (size - 1) / binSize + 1 iterations
213  while (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, ibs >= 0)) {
214  // Init
215  for (auto idx : independent_group_elements(acc, binsNumber)) {
216  cu[idx] = -1;
217  ct[idx] = -1;
218  }
219  alpaka::syncBlockThreads(acc);
220 
221  // Find the highest index for all the threads dealing with a given bin (in cu[])
222  // Also record the bin for each thread (in ct[])
223  for (auto idx : independent_group_elements(acc, binsNumber)) {
224  int i = ibs - idx;
225  int32_t bin = -1;
226  if (i >= 0) {
227  bin = (a[j[i]] >> sortingPassShift) & binsMask;
228  ct[idx] = bin;
229  alpaka::atomicMax(acc, &cu[bin], int(i), alpaka::hierarchy::Threads{});
230  }
231  }
232  alpaka::syncBlockThreads(acc);
233 
234  // FIXME: we can slash a memory access.
235  for (auto idx : independent_group_elements(acc, binsNumber)) {
236  int i = ibs - idx;
237  // Are we still in inside the data?
238  if (i >= 0) {
239  int32_t bin = ct[idx];
240  // Are we the thread with the highest index (from previous pass)?
241  if (cu[bin] == i) {
242  // With the highest index, we are actually the lowest thread number. We will
243  // work "on behalf of" the higher thread numbers (including ourselves)
244  // No way around scanning and testing for bin in ct[otherThread] number to find the other threads
245  for (int peerThreadIdx = idx; peerThreadIdx < binsNumber; peerThreadIdx++) {
246  if (ct[peerThreadIdx] == bin) {
247  k[--c[bin]] = j[ibs - peerThreadIdx];
248  }
249  }
250  }
251  }
252  /*
253  int32_t bin = (i >= 0 ? ((a[j[i]] >> sortingPassShift) & binsMask) : -1);
254  if (i >= 0 && i == cu[bin]) // ensure to keep them in order: only one thread per bin is active, rest is idle.
255  //
256  for (int ii = idx; ii < sb; ++ii)
257  if (ct[ii] == bin) {
258  auto oi = ii - idx;
259  // assert(i>=oi);if(i>=oi)
260  k[--c[bin]] = j[i - oi]; // i = ibs - idx, oi = ii - idx => i - oi = ibs - ii;
261  }
262  */
263  }
264  alpaka::syncBlockThreads(acc);
265 
266  if (threadIdxLocal == 0) {
267  ibs -= binsNumber;
268  // https://github.com/cms-patatrack/pixeltrack-standalone/pull/210
269  // TODO: is this really needed?
270  alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
271  }
272  alpaka::syncBlockThreads(acc);
273  }
274 
275  /*
276  // broadcast for the nulls (for documentation)
277  if (threadIdxLocal==0)
278  for (int i=size-first-1; i>=0; i--) { // =blockDim.x) {
279  auto bin = (a[j[i]] >> d*p)&(sb-1);
280  auto ik = atomicSub(&c[bin],1);
281  k[ik-1] = j[i];
282  }
283  */
284 
285  alpaka::syncBlockThreads(acc);
286  ALPAKA_ASSERT_ACC(c[0] == 0);
287 
288  // swap (local, ok)
289  auto t = j;
290  j = k;
291  k = t;
292 
293  const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
294  if (threadIdxLocal == 0)
295  ++currentSortingPass;
296  alpaka::syncBlockThreads(acc);
297  }
298 
299  if ((dataBits != 8) && (0 == (NS & 1)))
301  ind); // dataBits/binBits is even so ind is correct (the result is in the right location)
302 
303  // TODO this copy is (doubly?) redundant with the reorder
304  if (j != ind) // odd number of sorting passes, we need to move the result to the right array (ind[])
305  for (auto idx : independent_group_elements(acc, size)) {
306  ind[idx] = ind2[idx];
307  };
308 
309  alpaka::syncBlockThreads(acc);
310 
311  // now move negative first... (if signed)
312  // TODO: the ind2 => ind copy should have beed deferred. We should pass (j != ind) as an extra parameter
313  reorder(acc, a, ind, ind2, size);
314  } else {
315  //static_assert(false);
316  }
317  }
size
Write out results.
T1 atomicMax(T1 *a, T2 b)
Definition: cudaCompat.h:97
const uint32_t threadIdxLocal(alpaka::getIdx< alpaka::Block, alpaka::Threads >(acc)[0u])
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
assert(be >=bs)
double a
Definition: hdecay.h:121
float x
long double T
T1 atomicAdd(T1 *a, T2 b)
Definition: cudaCompat.h:61

◆ radixSortMulti()

template<typename TAcc , typename T , int NS = sizeof(T)>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSortMulti ( const TAcc &  acc,
T const *  v,
uint16_t *  index,
uint32_t const *  offsets,
uint16_t *  workspace 
)

Definition at line 382 of file radixSort.h.

References a, cms::cuda::assert(), cms::cudacompat::blockIdx, unpackBuffers-CaloStage1::offsets, findQualityFiles::v, and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

383  {
384  // TODO: check
385  // Sort multiple blocks of data in v[] separated by in chunks located at offsets[]
386  // extern __shared__ uint16_t ws[];
387  uint16_t* ws = alpaka::getDynSharedMem<uint16_t>(acc);
388 
389  const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
390  auto a = v + offsets[blockIdx];
391  auto ind = index + offsets[blockIdx];
392  auto ind2 = nullptr == workspace ? ws : workspace + offsets[blockIdx];
393  auto size = offsets[blockIdx + 1] - offsets[blockIdx];
395  if (size > 0)
396  radixSort<TAcc, T, NS>(acc, a, ind, ind2, size);
397  }
size
Write out results.
assert(be >=bs)
const dim3 blockIdx
Definition: cudaCompat.h:32
double a
Definition: hdecay.h:121

◆ reorderFloat()

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::reorderFloat ( const TAcc &  acc,
T const *  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size 
)

Definition at line 54 of file radixSort.h.

References a, heavyIonCSV_trainingSettings::idx, and independent_group_elements().

55  {
56  //move negative first...
57 
58  auto& firstNeg = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
59  firstNeg = a[ind[0]] < 0 ? 0 : size;
60  alpaka::syncBlockThreads(acc);
61 
62  // find first negative
63  for (auto idx : independent_group_elements(acc, size - 1)) {
64  if ((a[ind[idx]] ^ a[ind[idx + 1]]) < 0)
65  firstNeg = idx + 1;
66  }
67  alpaka::syncBlockThreads(acc);
68 
69  for (auto idx : independent_group_elements(acc, firstNeg, size)) {
70  ind2[size - idx - 1] = ind[idx];
71  }
72  alpaka::syncBlockThreads(acc);
73 
74  for (auto idx : independent_group_elements(acc, firstNeg)) {
75  ind2[idx + size - firstNeg] = ind[idx];
76  }
77  alpaka::syncBlockThreads(acc);
78 
79  for (auto idx : independent_group_elements(acc, size)) {
80  ind[idx] = ind2[idx];
81  }
82  }
size
Write out results.
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
double a
Definition: hdecay.h:121

◆ reorderSigned()

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::reorderSigned ( const TAcc &  acc,
T const *  a,
uint16_t *  ind,
uint16_t *  ind2,
uint32_t  size 
)

Definition at line 21 of file radixSort.h.

References a, heavyIonCSV_trainingSettings::idx, and independent_group_elements().

22  {
23  //move negative first...
24 
25  auto& firstNeg = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
26  firstNeg = a[ind[0]] < 0 ? 0 : size;
27  alpaka::syncBlockThreads(acc);
28 
29  // find first negative
30  for (auto idx : independent_group_elements(acc, size - 1)) {
31  if ((a[ind[idx]] ^ a[ind[idx + 1]]) < 0) {
32  firstNeg = idx + 1;
33  }
34  }
35 
36  alpaka::syncBlockThreads(acc);
37 
38  for (auto idx : independent_group_elements(acc, firstNeg, size)) {
39  ind2[idx - firstNeg] = ind[idx];
40  }
41  alpaka::syncBlockThreads(acc);
42 
43  for (auto idx : independent_group_elements(acc, firstNeg)) {
44  ind2[idx + size - firstNeg] = ind[idx];
45  }
46  alpaka::syncBlockThreads(acc);
47 
48  for (auto idx : independent_group_elements(acc, size)) {
49  ind[idx] = ind2[idx];
50  }
51  }
size
Write out results.
ALPAKA_FN_ACC auto independent_group_elements(TAcc const &acc, TArgs... args)
double a
Definition: hdecay.h:121

◆ round_up_by()

constexpr Idx cms::alpakatools::round_up_by ( Idx  value,
Idx  divisor 
)
inline

Definition at line 17 of file workdivision.h.

17 { return (value + divisor - 1) / divisor * divisor; }
Definition: value.py:1

◆ toBackend()

Backend cms::alpakatools::toBackend ( std::string_view  name)

Definition at line 13 of file Backend.cc.

References cms::Exception::addContext(), HLT_2024v12_cff::distance, spr::find(), newFWLiteAna::found, and Skims_PA_cff::name.

13  {
14  auto found = std::find(backendNames.begin(), backendNames.end(), name);
15  if (found == backendNames.end()) {
16  cms::Exception ex("EnumNotFound");
17  ex << "Invalid backend name '" << name << "'";
18  ex.addContext("Calling cms::alpakatools::toBackend()");
19  throw ex;
20  }
21  return static_cast<Backend>(std::distance(backendNames.begin(), found));
22  }
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:19

◆ toString()

std::string_view cms::alpakatools::toString ( Backend  backend)

Definition at line 24 of file Backend.cc.

References cms::Exception::addContext(), HLT_2024v12_cff::backend, size, and heppy_batch::val.

Referenced by TestAlpakaObjectAnalyzer::analyze(), and TestAlpakaAnalyzer::analyze().

24  {
25  auto val = static_cast<unsigned short>(backend);
26  if (val >= static_cast<unsigned short>(Backend::size)) {
27  cms::Exception ex("InvalidEnumValue");
28  ex << "Invalid backend enum value " << val;
29  ex.addContext("Calling cms::alpakatools::toString()");
30  throw ex;
31  }
32  return backendNames[val];
33  }
size
Write out results.

◆ uniform_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 311 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::test::add_vectors_d(), ALPAKA_ACCELERATOR_NAMESPACE::test::add_vectors_f(), ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::for(), ALPAKA_ACCELERATOR_NAMESPACE::PFRecHitProducerKernelConstruct< CAL >::operator()(), cms::alpakatools::countFromVector::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernel::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::InitDoublets< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN::operator()(), calibPixel::CalibDigis< debug >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_BLFastFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_FastFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::setHitsLayerStart< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::LoadTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_nullhypot::operator()(), cms::alpakatools::fillFromVector::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoMultiKernel2::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_minimize::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoMultiKernel3::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_checkOverflows< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ECLCCInit::operator()(), calibPixel::CalibDigisPhase2::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::CountModules< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_CircleFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ECLCCCompute1::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::PFRecHitProducerKernelTopology< CAL >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_LineFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ECLCCFlatten::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdate::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fishboneCleaner< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::Kernel_BLFit< N, TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdateMulti2::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_earlyDuplicateRemover< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::TestAlgoKernelUpdateMulti3::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fastDuplicateRemover< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::RawToDigi_kernel< debug >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_find_ntuplets< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_mark_used< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_countMultiplicity< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fillMultiplicity< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_classifyTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_findamplchi2_and_finish::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_doStatsForTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_countHitInTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fillHitInTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fillHitDetIndices< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_fillNLayers< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_doStatsForHitInTracks< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_countSharedHit< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_markSharedHit< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_rejectDuplicate< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_fixMGPAslew::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_sharedHitCleaner< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_computation_init::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_tripletCleaner< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_simpleTripletCleaner< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_print_found_ntuplets< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_correction_and_finalize::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::SeedingTopoThresh::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::PrepareTopoInputs::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::sortByPt2().

311  {
312  return detail::UniformElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
313  }
uint32_t Idx
Definition: config.h:14

◆ uniform_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_along ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 325 of file workdivision.h.

References writedatasetfile::args.

325  {
326  return detail::UniformElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
327  }
uint32_t Idx
Definition: config.h:14

◆ uniform_elements_nd() [1/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_nd ( TAcc const &  acc)
inline

Definition at line 575 of file workdivision.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_2d::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::FillRhfIndex::operator()().

575  {
576  return detail::UniformElementsND<TAcc>(acc);
577  }

◆ uniform_elements_nd() [2/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_nd ( TAcc const &  acc,
alpaka::Vec< alpaka::Dim< TAcc >, Idx extent 
)
inline

Definition at line 580 of file workdivision.h.

580  {
581  return detail::UniformElementsND<TAcc>(acc, extent);
582  }

◆ uniform_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_x ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 337 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_y ( TAcc const &  acc,
TArgs...  args 
)
inline

◆ uniform_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_z ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 351 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_group_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 978 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_1d_and_initialize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_makeratio::operator()().

978  {
979  return detail::UniformGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
980  }
uint32_t Idx
Definition: config.h:14

◆ uniform_group_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_along ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 992 of file workdivision.h.

References writedatasetfile::args.

992  {
993  return detail::UniformGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
994  }
uint32_t Idx
Definition: config.h:14

◆ uniform_group_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_x ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1005 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_group_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_y ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1012 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_group_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_z ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 1019 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_groups()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 759 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_1d_and_initialize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_makeratio::operator()().

759  {
760  return detail::UniformGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
761  }
uint32_t Idx
Definition: config.h:14

◆ uniform_groups_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_along ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 773 of file workdivision.h.

References writedatasetfile::args.

773  {
774  return detail::UniformGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
775  }
uint32_t Idx
Definition: config.h:14

◆ uniform_groups_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_x ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 785 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_groups_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_y ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 792 of file workdivision.h.

References writedatasetfile::args.

◆ uniform_groups_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_z ( TAcc const &  acc,
TArgs...  args 
)
inline

Definition at line 799 of file workdivision.h.

References writedatasetfile::args.

◆ warpPrefixScan() [1/2]

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::warpPrefixScan ( const TAcc &  acc,
int32_t  laneId,
T const *  ci,
T co,
uint32_t  i,
bool  active = true 
)

Definition at line 23 of file prefixScan.h.

References CMS_UNROLL_LOOP, cms::cuda::co, DTskim_cfg::dataType, mps_fire::i, hltrates_dqm_sourceclient-live_cfg::offset, and x.

Referenced by blockPrefixScan(), and warpPrefixScan().

24  {
25  // ci and co may be the same
26  T x = active ? ci[i] : 0;
28  for (int32_t offset = 1; offset < alpaka::warp::getSize(acc); offset <<= 1) {
29  // Force the exact type for integer types otherwise the compiler will find the template resolution ambiguous.
30  using dataType = std::conditional_t<std::is_floating_point_v<T>, T, std::int32_t>;
31  T y = alpaka::warp::shfl(acc, static_cast<dataType>(x), laneId - offset);
32  if (laneId >= offset)
33  x += y;
34  }
35  if (active)
36  co[i] = x;
37  }
__host__ __device__ VT * co
Definition: prefixScan.h:47
#define CMS_UNROLL_LOOP
Definition: CMSUnrollLoop.h:47
float x
long double T

◆ warpPrefixScan() [2/2]

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::warpPrefixScan ( const TAcc &  acc,
int32_t  laneId,
T c,
uint32_t  i,
bool  active = true 
)

Definition at line 40 of file prefixScan.h.

References HltBtagPostValidation_cff::c, mps_fire::i, and warpPrefixScan().

41  {
42  warpPrefixScan(acc, laneId, c, c, i, active);
43  }
ALPAKA_FN_ACC ALPAKA_FN_INLINE void warpPrefixScan(const TAcc &acc, int32_t laneId, T *c, uint32_t i, bool active=true)
Definition: prefixScan.h:40

Variable Documentation

◆ allocator_policy

template<typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
constexpr AllocatorPolicy cms::alpakatools::allocator_policy = AllocatorPolicy::Synchronous
inline

Definition at line 17 of file AllocatorPolicy.h.

Referenced by make_device_buffer(), and make_host_buffer().

◆ requires_single_thread_per_block_v

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
constexpr bool cms::alpakatools::requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value
inline

Definition at line 43 of file workdivision.h.