Namespaces
	detail

	impl

	traits

Classes
struct	AllocatorConfig

class	AtomicPairCounter

class	CachingAllocator

struct	CopyToDevice

struct	CopyToDevice< cms::alpakatest::AlpakaESTestDataB< alpaka_common::DevHost > >

struct	CopyToDevice< cms::alpakatest::AlpakaESTestDataEHost >

struct	CopyToDevice< PixelCPEFastParamsHost< TrackerTraits > >

struct	CopyToDevice< PortableHostCollection< TLayout > >

struct	CopyToDevice< PortableHostMultiCollection< TDev, T0, Args... > >

struct	CopyToDevice< PortableHostObject< TProduct > >

struct	CopyToDevice<::hcal::HcalRecoParamWithPulseShapeHost >

struct	CopyToHost

struct	CopyToHost< PortableDeviceCollection< TLayout, TDevice > >

struct	CopyToHost< PortableDeviceMultiCollection< TDev, T0, Args... > >

struct	CopyToHost< PortableDeviceObject< TProduct, TDevice > >

struct	CopyToHost< SiPixelClustersDevice< TDevice > >

struct	CopyToHost< SiPixelDigiErrorsDevice< TDevice > >

struct	CopyToHost< SiPixelDigisDevice< TDevice > >

struct	CopyToHost< TrackingRecHitDevice< TrackerTraits, TDevice > >

struct	CopyToHost< TracksDevice< TrackerTraits, TDevice > >

struct	CopyToHost< ZVertexDevice< TDevice > >

struct	countFromVector

struct	ElementIndex

class	EventCache

struct	fillFromVector

class	FlexiStorage

class	FlexiStorage< I, -1 >

class	HistoContainer

struct	multiBlockPrefixScan

class	OneToManyAssocBase

class	OneToManyAssocRandomAccess

class	OneToManyAssocSequential

class	QueueCache

struct	radixSortMultiWrapper

struct	radixSortMultiWrapper2

struct	requires_single_thread_per_block

class	ScopedContextAcquire

class	ScopedContextAnalyze

class	ScopedContextProduce

class	ScopedContextTask

struct	SimpleVector

class	VecArray

Typedefs
template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using	const_device_buffer = alpaka::ViewConst< device_buffer< TDev, T > >

template<typename T >
using	const_host_buffer = alpaka::ViewConst< host_buffer< T > >

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using	device_buffer = typename detail::buffer_type< TDev, T >::type

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
using	device_view = typename detail::view_type< TDev, T >::type

template<typename T >
using	host_buffer = typename detail::buffer_type< DevHost, T >::type

template<typename T >
using	host_view = typename detail::view_type< DevHost, T >::type

Enumerations
enum	AllocatorPolicy { AllocatorPolicy::Synchronous = 0, AllocatorPolicy::Asynchronous = 1, AllocatorPolicy::Caching = 2 }

enum	Backend : unsigned short { Backend::SerialSync = 0, Backend::CudaAsync = 1, Backend::ROCmAsync = 2, Backend::TbbAsync = 3, Backend::size }

Functions
template<typename TElem , typename TIdx , typename TExtent , typename TQueue , typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
ALPAKA_FN_HOST auto	allocCachedBuf (TDev const &dev, TQueue queue, TExtent const &extent=TExtent())

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	blockPrefixScan (const TAcc &acc, T const ci, T co, int32_t size, T *ws=nullptr)

template<typename TAcc , typename T >
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void	blockPrefixScan (const TAcc &acc, T __restrict__ c, int32_t size, T __restrict__ ws=nullptr)

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
alpaka::Dev< TPlatform > const &	chooseDevice (edm::StreamID id)

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
std::vector< alpaka::Dev< TPlatform > > const &	devices ()

constexpr Idx	divide_up_by (Idx value, Idx divisor)

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	dummyReorder (const TAcc &acc, T const a, uint16_t ind, uint16_t *ind2, uint32_t size)

template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void	fillManyFromVector (Histo __restrict__ h, uint32_t nh, T const __restrict__ v, uint32_t const *__restrict__ offsets, uint32_t totSize, uint32_t nthreads, TQueue &queue)

template<typename TAcc , typename Histo , typename T , typename TQueue >
ALPAKA_FN_INLINE void	fillManyFromVector (Histo __restrict__ h, typename Histo::View hv, uint32_t nh, T const __restrict__ v, uint32_t const *__restrict__ offsets, uint32_t totSize, uint32_t nthreads, TQueue &queue)

template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	forEachInBins (Hist const &hist, V value, int n, Func func)

template<typename Hist , typename V , typename Func >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	forEachInWindow (Hist const &hist, V wmin, V wmax, Func const &func)

template<typename TDev , typename TQueue , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>
CachingAllocator< TDev, TQueue > &	getDeviceCachingAllocator (TDev const &device, AllocatorConfig const &config=AllocatorConfig{}, bool debug=false)

template<typename Event >
EventCache< Event > &	getEventCache ()

template<typename TQueue , typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
CachingAllocator< alpaka_common::DevHost, TQueue > &	getHostCachingAllocator (AllocatorConfig const &config=AllocatorConfig{}, bool debug=false)

template<typename Queue >
QueueCache< Queue > &	getQueueCache ()

alpaka::DevCpu const &	host ()

alpaka::PlatformCpu const &	host_platform ()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto	independent_group_elements (TAcc const &acc, TArgs... args)

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto	independent_group_elements_along (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	independent_group_elements_x (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto	independent_group_elements_y (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto	independent_group_elements_z (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto	independent_groups (TAcc const &acc, TArgs... args)

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto	independent_groups_along (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	independent_groups_x (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto	independent_groups_y (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto	independent_groups_z (TAcc const &acc, TArgs... args)

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
constexpr bool	isPowerOf2 (T v)

template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and not std::is_array_v< T >, device_buffer< TDev, T > >	make_device_buffer (TDev const &device)

template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< TDev, T > >	make_device_buffer (TDev const &device, Extent extent)

template<typename T , typename TDev >
std::enable_if_t< alpaka::isDevice< TDev > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< TDev, T > >	make_device_buffer (TDev const &device)

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and not std::is_array_v< T >, device_buffer< alpaka::Dev< TQueue >, T > >	make_device_buffer (TQueue const &queue)

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< alpaka::Dev< TQueue >, T > >	make_device_buffer (TQueue const &queue, Extent extent)

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_buffer< alpaka::Dev< TQueue >, T > >	make_device_buffer (TQueue const &queue)

template<typename T , typename TDev >
std::enable_if_t< not std::is_array_v< T >, device_view< TDev, T > >	make_device_view (TDev const &device, T &data)

template<typename T , typename TDev >
device_view< TDev, T[]>	make_device_view (TDev const &device, T *data, Extent extent)

template<typename T , typename TDev >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_view< TDev, T > >	make_device_view (TDev const &device, T &data, Extent extent)

template<typename T , typename TDev >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, device_view< TDev, T > >	make_device_view (TDev const &device, T &data)

template<typename T >
std::enable_if_t< not std::is_array_v< T >, host_buffer< T > >	make_host_buffer ()

template<typename T >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer (Extent extent)

template<typename T >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer ()

template<typename T , typename TPlatform >
std::enable_if_t< not std::is_array_v< T >, host_buffer< T > >	make_host_buffer ()

template<typename T , typename TPlatform >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer (Extent extent)

template<typename T , typename TPlatform >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer ()

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and not std::is_array_v< T >, host_buffer< T > >	make_host_buffer (TQueue const &queue)

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer (TQueue const &queue, Extent extent)

template<typename T , typename TQueue >
std::enable_if_t< alpaka::isQueue< TQueue > and cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_buffer< T > >	make_host_buffer (TQueue const &queue)

template<typename T >
std::enable_if_t< not std::is_array_v< T >, host_view< T > >	make_host_view (T &data)

template<typename T >
host_view< T[]>	make_host_view (T *data, Extent extent)

template<typename T >
std::enable_if_t< cms::is_unbounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_view< T > >	make_host_view (T &data, Extent extent)

template<typename T >
std::enable_if_t< cms::is_bounded_array_v< T > and not std::is_array_v< std::remove_extent_t< T > >, host_view< T > >	make_host_view (T &data)

template<class T >
SimpleVector< T >	make_SimpleVector (int capacity, T *data)

template<class T >
SimpleVector< T > *	make_SimpleVector (SimpleVector< T > mem, int capacity, T data)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
WorkDiv< Dim1D >	make_workdiv (Idx blocks, Idx elements)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
WorkDiv< alpaka::Dim< TAcc > >	make_workdiv (const Vec< alpaka::Dim< TAcc >> &blocks, const Vec< alpaka::Dim< TAcc >> &elements)

void	module_backend_config (edm::ConfigurationDescriptions &iDesc)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool	once_per_block (TAcc const &acc)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC constexpr bool	once_per_grid (TAcc const &acc)

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>
TPlatform const &	platform ()

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< std::is_unsigned< T >::value &&!requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	radixSort (const TAcc &acc, T const a, uint16_t ind, uint16_t *ind2, uint32_t size)

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>
ALPAKA_FN_INLINE void	radixSort (const TAcc &acc, T const a, uint16_t ind, uint16_t *ind2, uint32_t size)

template<typename TAcc , typename T , int NS, typename RF >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	radixSortImpl (const TAcc &acc, T const __restrict__ a, uint16_t ind, uint16_t *ind2, uint32_t size, RF reorder)

template<typename TAcc , typename T , int NS = sizeof(T)>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	radixSortMulti (const TAcc &acc, T const v, uint16_t index, uint32_t const offsets, uint16_t workspace)

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	reorderFloat (const TAcc &acc, T const a, uint16_t ind, uint16_t *ind2, uint32_t size)

template<typename TAcc , typename T >
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	reorderSigned (const TAcc &acc, T const a, uint16_t ind, uint16_t *ind2, uint32_t size)

constexpr Idx	round_up_by (Idx value, Idx divisor)

Backend	toBackend (std::string_view name)

std::string_view	toString (Backend backend)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto	uniform_elements (TAcc const &acc, TArgs... args)

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto	uniform_elements_along (TAcc const &acc, TArgs... args)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	uniform_elements_nd (TAcc const &acc)

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	uniform_elements_nd (TAcc const &acc, alpaka::Vec< alpaka::Dim< TAcc >, Idx > extent)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	uniform_elements_x (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto	uniform_elements_y (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto	uniform_elements_z (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto	uniform_group_elements (TAcc const &acc, TArgs... args)

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto	uniform_group_elements_along (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	uniform_group_elements_x (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto	uniform_group_elements_y (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto	uniform_group_elements_z (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
ALPAKA_FN_ACC auto	uniform_groups (TAcc const &acc, TArgs... args)

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>
ALPAKA_FN_ACC auto	uniform_groups_along (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>
ALPAKA_FN_ACC auto	uniform_groups_x (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>
ALPAKA_FN_ACC auto	uniform_groups_y (TAcc const &acc, TArgs... args)

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>
ALPAKA_FN_ACC auto	uniform_groups_z (TAcc const &acc, TArgs... args)

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	warpPrefixScan (const TAcc &acc, int32_t laneId, T const ci, T co, uint32_t i, bool active=true)

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void	warpPrefixScan (const TAcc &acc, int32_t laneId, T *c, uint32_t i, bool active=true)

Variables
template<typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev>>>
constexpr AllocatorPolicy	allocator_policy = AllocatorPolicy::Synchronous

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
constexpr bool	requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value

Typedef Documentation

◆ const_device_buffer

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>

using cms::alpakatools::const_device_buffer = typedef alpaka::ViewConst<device_buffer<TDev, T> >

Definition at line 180 of file memory.h.

◆ const_host_buffer

template<typename T >

using cms::alpakatools::const_host_buffer = typedef alpaka::ViewConst<host_buffer<T> >

Definition at line 60 of file memory.h.

◆ device_buffer

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>

using cms::alpakatools::device_buffer = typedef typename detail::buffer_type<TDev, T>::type

Definition at line 177 of file memory.h.

◆ device_view

template<typename TDev , typename T , typename = std::enable_if_t<alpaka::isDevice<TDev>>>

using cms::alpakatools::device_view = typedef typename detail::view_type<TDev, T>::type

Definition at line 257 of file memory.h.

◆ host_buffer

template<typename T >

using cms::alpakatools::host_buffer = typedef typename detail::buffer_type<DevHost, T>::type

Definition at line 57 of file memory.h.

◆ host_view

template<typename T >

using cms::alpakatools::host_view = typedef typename detail::view_type<DevHost, T>::type

Definition at line 150 of file memory.h.

Enumeration Type Documentation

◆ AllocatorPolicy

enum cms::alpakatools::AllocatorPolicy

strong

Enumerator
Synchronous
Asynchronous
Caching

Definition at line 14 of file AllocatorPolicy.h.

14 { Synchronous = 0, Asynchronous = 1, Caching = 2 };

cms::alpakatools::AllocatorPolicy::Caching

cms::alpakatools::AllocatorPolicy::Synchronous

cms::alpakatools::AllocatorPolicy::Asynchronous

◆ Backend

enum cms::alpakatools::Backend : unsigned short

strong

Enumerator
SerialSync
CudaAsync
ROCmAsync
TbbAsync
size

Definition at line 8 of file Backend.h.

8 : unsigned short { SerialSync = 0, CudaAsync = 1, ROCmAsync = 2, TbbAsync = 3, size };

findQualityFiles.size

size

Write out results.

Definition: findQualityFiles.py:443

cms::alpakatools::Backend::TbbAsync

cms::alpakatools::Backend::ROCmAsync

cms::alpakatools::Backend::CudaAsync

cms::alpakatools::Backend::SerialSync

Function Documentation

◆ allocCachedBuf()

template<typename TElem , typename TIdx , typename TExtent , typename TQueue , typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>

ALPAKA_FN_HOST auto cms::alpakatools::allocCachedBuf	(	TDev const &	dev,
		TQueue	queue,
		TExtent const &	extent = `TExtent()`
	)

Definition at line 197 of file CachedBufAlloc.h.

References createBeamHaloJobs::queue.

                                                                                                        {
     return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent);
   }

◆ blockPrefixScan() [1/2]

template<typename TAcc , typename T >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::blockPrefixScan	(	const TAcc &	acc,
		T const *	ci,
		T *	co,
		int32_t	size,
		T *	ws = `nullptr`
	)

Definition at line 47 of file prefixScan.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::co, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), dqmdumpme::first, mps_fire::i, isPowerOf2(), warpPrefixScan(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

Referenced by cms::alpakatools::OneToManyAssocRandomAccess< I, NHISTS *NBINS+1, SIZE >::finalize(), gpuClustering::for(), pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), cms::alpakatools::multiBlockPrefixScan< T >::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::FillHitsModuleStart< TrackerTraits >::operator()().

                                                                           {
     if constexpr (!requires_single_thread_per_block_v<TAcc>) {
       const auto warpSize = alpaka::warp::getSize(acc);
       int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
       int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
       ALPAKA_ASSERT_ACC(ws);
       ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
       ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
       auto first = blockThreadIdx;
       ALPAKA_ASSERT_ACC(isPowerOf2(warpSize));
       auto laneId = blockThreadIdx & (warpSize - 1);
       auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;
 
       for (auto i = first; i < warpUpRoundedSize; i += blockDimension) {
         // When padding the warp, warpPrefixScan is a noop
         warpPrefixScan(acc, laneId, ci, co, i, i < size);
         if (i < size) {
           // Skipped in warp padding threads.
           auto warpId = i / warpSize;
           ALPAKA_ASSERT_ACC(warpId < warpSize);
           if ((warpSize - 1) == laneId)
             ws[warpId] = co[i];
         }
       }
       alpaka::syncBlockThreads(acc);
       if (size <= warpSize)
         return;
       if (blockThreadIdx < warpSize) {
         warpPrefixScan(acc, laneId, ws, blockThreadIdx);
       }
       alpaka::syncBlockThreads(acc);
       for (auto i = first + warpSize; i < size; i += blockDimension) {
         int32_t warpId = i / warpSize;
         co[i] += ws[warpId - 1];
       }
       alpaka::syncBlockThreads(acc);
     } else {
       co[0] = ci[0];
       for (int32_t i = 1; i < size; ++i)
         co[i] = ci[i] + co[i - 1];
     }
   }

◆ blockPrefixScan() [2/2]

template<typename TAcc , typename T >

ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void cms::alpakatools::blockPrefixScan	(	const TAcc &	acc,
		T *__restrict__	c,
		int32_t	size,
		T *__restrict__	ws = `nullptr`
	)

Definition at line 92 of file prefixScan.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), HltBtagPostValidation_cff::c, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), dqmdumpme::first, mps_fire::i, warpPrefixScan(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

                                                                                          {
     if constexpr (!requires_single_thread_per_block_v<TAcc>) {
       const auto warpSize = alpaka::warp::getSize(acc);
       int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
       int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
       ALPAKA_ASSERT_ACC(ws);
       ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
       ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
       auto first = blockThreadIdx;
       auto laneId = blockThreadIdx & (warpSize - 1);
       auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;
 
       for (auto i = first; i < warpUpRoundedSize; i += blockDimension) {
         // When padding the warp, warpPrefixScan is a noop
         warpPrefixScan(acc, laneId, c, i, i < size);
         if (i < size) {
           // Skipped in warp padding threads.
           auto warpId = i / warpSize;
           ALPAKA_ASSERT_ACC(warpId < warpSize);
           if ((warpSize - 1) == laneId)
             ws[warpId] = c[i];
         }
       }
       alpaka::syncBlockThreads(acc);
       if (size <= warpSize)
         return;
       if (blockThreadIdx < warpSize) {
         warpPrefixScan(acc, laneId, ws, blockThreadIdx);
       }
       alpaka::syncBlockThreads(acc);
       for (auto i = first + warpSize; i < size; i += blockDimension) {
         auto warpId = i / warpSize;
         c[i] += ws[warpId - 1];
       }
       alpaka::syncBlockThreads(acc);
     } else {
       for (int32_t i = 1; i < size; ++i)
         c[i] += c[i - 1];
     }
   }

◆ chooseDevice()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>

alpaka::Dev<TPlatform> const& cms::alpakatools::chooseDevice ( edm::StreamID id )

Definition at line 16 of file chooseDevice.h.

References cms::Exception::addContext(), and devices().

                                                            {
     edm::Service<ALPAKA_TYPE_ALIAS(AlpakaService)> service;
     if (not service->enabled()) {
       cms::Exception ex("RuntimeError");
       ex << "Unable to choose current device because " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " is disabled.\n"
          << "If " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " was not explicitly disabled in the configuration,\n"
          << "the probable cause is that there is no accelerator or there is some problem\n"
          << "with the accelerator runtime or drivers.";
       ex.addContext("Calling cms::alpakatools::chooseDevice()");
       throw ex;
     }
 
     // For startes we "statically" assign the device based on
     // edm::Stream number. This is suboptimal if the number of
     // edm::Streams is not a multiple of the number of devices
     // (and even then there is no load balancing).
 
     // TODO: improve the "assignment" logic
     auto const& devices = cms::alpakatools::devices<TPlatform>();
     return devices[id % devices.size()];
   }

◆ devices()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>

std::vector<alpaka::Dev<TPlatform> > const& cms::alpakatools::devices ( )

inline

Definition at line 22 of file devices.h.

Referenced by cms::alpakatools::detail::allocate_device_allocators(), ALPAKA_ACCELERATOR_NAMESPACE::AlpakaService::AlpakaService(), SiStripFedCablingBuilderFromDb::buildFecCabling(), ALPAKA_ACCELERATOR_NAMESPACE::detail::chooseDevice(), chooseDevice(), FastFedCablingHistosUsingDb::connections(), SiStripConfigDb::printDeviceDescriptions(), ALPAKA_ACCELERATOR_NAMESPACE::ESProducer::setWhatProducedDevice(), cms::cudatest::testDevices(), cms::rocmtest::testDevices(), ApvTimingHistosUsingDb::update(), OptoScanHistosUsingDb::update(), VpspScanHistosUsingDb::update(), LatencyHistosUsingDb::update(), CalibrationHistosUsingDb::update(), FineDelayHistosUsingDb::update(), VpspScanHistosUsingDb::uploadConfigurations(), OptoScanHistosUsingDb::uploadConfigurations(), ApvTimingHistosUsingDb::uploadConfigurations(), LatencyHistosUsingDb::uploadConfigurations(), CalibrationHistosUsingDb::uploadConfigurations(), and FineDelayHistosUsingDb::uploadConfigurations().

                                                           {
     // enumerate all devices the first time that this function is called
     static const auto devices = alpaka::getDevs(platform<TPlatform>());
     return devices;
   }

◆ divide_up_by()

constexpr Idx cms::alpakatools::divide_up_by	(	Idx	value,
		Idx	divisor
	)

inline

Definition at line 20 of file workdivision.h.

20 { return (value + divisor - 1) / divisor; }

value

Definition: value.py:1

◆ dummyReorder()

template<typename TAcc , typename T >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::dummyReorder	(	const TAcc &	acc,
		T const *	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size
	)

Definition at line 17 of file radixSort.h.

18 {}

◆ fillManyFromVector() [1/2]

template<typename TAcc , typename Histo , typename T , typename TQueue >

ALPAKA_FN_INLINE void cms::alpakatools::fillManyFromVector	(	Histo *__restrict__	h,
		uint32_t	nh,
		T const *__restrict__	v,
		uint32_t const *__restrict__	offsets,
		uint32_t	totSize,
		uint32_t	nthreads,
		TQueue &	queue
	)

Definition at line 59 of file HistoContainer.h.

References divide_up_by(), h, cms::cuda::nh, cms::cuda::nthreads, cms::cuda::offsets, createBeamHaloJobs::queue, svgfig::template(), cms::cuda::totSize, and cms::cuda::v.

Referenced by SiPixelRecHitSoAFromLegacyT< TrackerTraits >::produce().

                                                           {
     Histo::template launchZero<TAcc>(h, queue);
 
     const auto threadsPerBlockOrElementsPerThread = nthreads;
     const auto blocksPerGrid = divide_up_by(totSize, nthreads);
     const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
 
     alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
     Histo::template launchFinalize<TAcc>(h, queue);
 
     alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
   }

◆ fillManyFromVector() [2/2]

template<typename TAcc , typename Histo , typename T , typename TQueue >

ALPAKA_FN_INLINE void cms::alpakatools::fillManyFromVector	(	Histo *__restrict__	h,
		typename Histo::View	hv,
		uint32_t	nh,
		T const *__restrict__	v,
		uint32_t const *__restrict__	offsets,
		uint32_t	totSize,
		uint32_t	nthreads,
		TQueue &	queue
	)

Definition at line 79 of file HistoContainer.h.

References divide_up_by(), h, cms::cuda::nh, cms::cuda::nthreads, cms::cuda::offsets, createBeamHaloJobs::queue, svgfig::template(), cms::cuda::totSize, and cms::cuda::v.

                                                           {
     Histo::template launchZero<TAcc>(hv, queue);
 
     const auto threadsPerBlockOrElementsPerThread = nthreads;
     const auto blocksPerGrid = divide_up_by(totSize, nthreads);
     const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
 
     alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
     Histo::template launchFinalize<TAcc>(h, queue);
 
     alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
   }

◆ forEachInBins()

template<typename Hist , typename V , typename Func >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::forEachInBins	(	Hist const &	hist,
		V	value,
		int	n,
		Func	func
	)

Definition at line 101 of file HistoContainer.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::be, newFWLiteAna::bin, cms::cuda::bs, cms::cuda::func, compareTotals::hist, SiStripPI::max, SiStripPI::min, cms::cuda::n, and LaserClient_cfi::nbins.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN::operator()().

                                                                                                  {
     int bs = Hist::bin(value);
     int be = std::min(int(Hist::nbins() - 1), bs + n);
     bs = std::max(0, bs - n);
     ALPAKA_ASSERT_ACC(be >= bs);
     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
       func(*pj);
     }
   }

◆ forEachInWindow()

template<typename Hist , typename V , typename Func >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::forEachInWindow	(	Hist const &	hist,
		V	wmin,
		V	wmax,
		Func const &	func
	)

Definition at line 113 of file HistoContainer.h.

References ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::be, newFWLiteAna::bin, cms::cuda::bs, cms::cuda::func, compareTotals::hist, cms::cuda::wmax, and cms::cuda::wmin.

                                                                                                           {
     auto bs = Hist::bin(wmin);
     auto be = Hist::bin(wmax);
     ALPAKA_ASSERT_ACC(be >= bs);
     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
       func(*pj);
     }
   }

◆ getDeviceCachingAllocator()

template<typename TDev , typename TQueue , typename = std::enable_if_t<alpaka::isDevice<TDev> and alpaka::isQueue<TQueue>>>

CachingAllocator<TDev, TQueue>& cms::alpakatools::getDeviceCachingAllocator	(	TDev const &	device,
		AllocatorConfig const &	config = `AllocatorConfig{}`,
		bool	debug = `false`
	)

inline

Definition at line 74 of file getDeviceCachingAllocator.h.

                                                                                                                   {},
                                                                    bool debug = false) {
     // initialise all allocators, one per device
     CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators<TDev, TQueue>(config, debug);
 
     size_t const index = alpaka::getNativeHandle(device);
     assert(index < cms::alpakatools::devices<alpaka::Platform<TDev>>().size());
 
     // the public interface is thread safe
     return allocators[index];
   }

◆ getEventCache()

template<typename Event >

EventCache<Event>& cms::alpakatools::getEventCache ( )

Definition at line 96 of file EventCache.h.

References utilities::cache(), and CMS_THREAD_SAFE.

                                      {
     // the public interface is thread safe
     CMS_THREAD_SAFE static EventCache<Event> cache;
     return cache;
   }

◆ getHostCachingAllocator()

template<typename TQueue , typename = std::enable_if_t<alpaka::isQueue<TQueue>>>

CachingAllocator<alpaka_common::DevHost, TQueue>& cms::alpakatools::getHostCachingAllocator	(	AllocatorConfig const &	config = `AllocatorConfig{}`,
		bool	debug = `false`
	)

inline

Definition at line 16 of file getHostCachingAllocator.h.

                                                      {}, bool debug = false) {
     // thread safe initialisation of the host allocator
     CMS_THREAD_SAFE static CachingAllocator<alpaka_common::DevHost, TQueue> allocator(
         host(),
         config,
         false,  // reuseSameQueueAllocations
         debug);
 
     // the public interface is thread safe
     return allocator;
   }

◆ getQueueCache()

template<typename Queue >

QueueCache<Queue>& cms::alpakatools::getQueueCache ( )

Definition at line 65 of file QueueCache.h.

References utilities::cache(), and CMS_THREAD_SAFE.

                                      {
     // the public interface is thread safe
     CMS_THREAD_SAFE static QueueCache<Queue> cache;
     return cache;
   }

◆ host()

alpaka::DevCpu const& cms::alpakatools::host ( )

inline

Definition at line 14 of file host.h.

14 { return devices<alpaka::PlatformCpu>()[0]; }

◆ host_platform()

alpaka::PlatformCpu const& cms::alpakatools::host_platform ( )

inline

Definition at line 11 of file host.h.

11 { return platform<alpaka::PlatformCpu>(); }

◆ independent_group_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1329 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::for(), pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelRecHits::GetHits< TrackerTraits >::operator()(), cms::alpakatools::OneToManyAssocBase< I, ONES, SIZE >::zeroAndInit::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::FindClus< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::FillHitsModuleStart< TrackerTraits >::operator()(), radixSortImpl(), reorderFloat(), and reorderSigned().

                                                                                        {
     return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
   }

◆ independent_group_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>

ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_along	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1343 of file workdivision.h.

References writedatasetfile::args.

                                                                                              {
     return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
   }

◆ independent_group_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_x	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1356 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::for(), ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::CAFishbone< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep1d_sameNumberOfSamples::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_connect< TrackerTraits >::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep_pulseMatrices_sameNumberOfSamples::operator()().

                                                                                          {
     return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
   }

◆ independent_group_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>

ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_y	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1363 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep_pulseMatrices_sameNumberOfSamples::operator()().

                                                                                          {
     return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
   }

◆ independent_group_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>

ALPAKA_FN_ACC auto cms::alpakatools::independent_group_elements_z	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1370 of file workdivision.h.

References writedatasetfile::args.

                                                                                          {
     return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
   }

◆ independent_groups()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

ALPAKA_FN_ACC auto cms::alpakatools::independent_groups	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1170 of file workdivision.h.

References writedatasetfile::args.

Referenced by pixelClustering::ClusterChargeCut< TrackerTraits >::operator()(), ALPAKA_ACCELERATOR_NAMESPACE::pixelRecHits::GetHits< TrackerTraits >::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering::FindClus< TrackerTraits >::operator()().

                                                                                {
     return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
   }

◆ independent_groups_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>

ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_along	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1184 of file workdivision.h.

References writedatasetfile::args.

                                                                                      {
     return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
   }

◆ independent_groups_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_x	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1197 of file workdivision.h.

References writedatasetfile::args.

                                                                                  {
     return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
   }

◆ independent_groups_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>

ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_y	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1204 of file workdivision.h.

References writedatasetfile::args.

                                                                                  {
     return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
   }

◆ independent_groups_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>

ALPAKA_FN_ACC auto cms::alpakatools::independent_groups_z	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1211 of file workdivision.h.

References writedatasetfile::args.

                                                                                  {
     return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
   }

◆ isPowerOf2()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>

constexpr bool cms::alpakatools::isPowerOf2 ( T v )

Definition at line 11 of file prefixScan.h.

References findQualityFiles::v.

Referenced by blockPrefixScan().

                                  {
     // returns true iif v has only one bit set.
     while (v) {
       if (v & 1)
         return !(v >> 1);
       else
         v >>= 1;
     }
     return false;
   }

◆ make_device_buffer() [1/6]

template<typename T , typename TDev >

std::enable_if_t<alpaka::isDevice<TDev> and not std::is_array_v<T>, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer ( TDev const & device )

Definition at line 185 of file memory.h.

                           {
     return alpaka::allocBuf<T, Idx>(device, Scalar{});
   }

◆ make_device_buffer() [2/6]

template<typename T , typename TDev >

std::enable_if_t<alpaka::isDevice<TDev> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer	(	TDev const &	device,
		Extent	extent
	)

Definition at line 194 of file memory.h.

                                                         {
     return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(device, Vec1D{extent});
   }

◆ make_device_buffer() [3/6]

template<typename T , typename TDev >

std::enable_if_t<alpaka::isDevice<TDev> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<TDev, T> > cms::alpakatools::make_device_buffer ( TDev const & device )

Definition at line 202 of file memory.h.

                                          {
     return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(device, Vec1D{std::extent_v<T>});
   }

◆ make_device_buffer() [4/6]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and not std::is_array_v<T>, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer ( TQueue const & queue )

Definition at line 210 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

                                           {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<T, Idx>(alpaka::getDev(queue), queue, Scalar{});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
       return alpaka::allocAsyncBuf<T, Idx>(queue, Scalar{});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
       return alpaka::allocBuf<T, Idx>(alpaka::getDev(queue), Scalar{});
     }
   }

◆ make_device_buffer() [5/6]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer	(	TQueue const &	queue,
		Extent	extent
	)

Definition at line 226 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

                                                          {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), queue, Vec1D{extent});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
       return alpaka::allocAsyncBuf<std::remove_extent_t<T>, Idx>(queue, Vec1D{extent});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
       return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), Vec1D{extent});
     }
   }

◆ make_device_buffer() [6/6]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_buffer<alpaka::Dev<TQueue>, T> > cms::alpakatools::make_device_buffer ( TQueue const & queue )

Definition at line 242 of file memory.h.

References allocator_policy, Asynchronous, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), createBeamHaloJobs::queue, and Synchronous.

                                           {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), queue, Vec1D{std::extent_v<T>});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Asynchronous) {
       return alpaka::allocAsyncBuf<std::remove_extent_t<T>, Idx>(queue, Vec1D{std::extent_v<T>});
     }
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Synchronous) {
       return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(alpaka::getDev(queue), Vec1D{std::extent_v<T>});
     }
   }

◆ make_device_view() [1/4]

template<typename T , typename TDev >

std::enable_if_t<not std::is_array_v<T>, device_view<TDev, T> > cms::alpakatools::make_device_view	(	TDev const &	device,
		T &	data
	)

Definition at line 260 of file memory.h.

References data.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::SiPixelRawToClusterKernel< pixelTopology::Phase2 >::makePhase1ClustersAsync(), ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::SiPixelRawToClusterKernel< pixelTopology::Phase2 >::makePhase2ClustersAsync(), and TrackingRecHitDevice< TrackerTraits, TDev >::updateFromDevice().

                                                                                                            {
     return alpaka::ViewPlainPtr<TDev, T, Dim0D, Idx>(&data, device, Scalar{});
   }

◆ make_device_view() [2/4]

template<typename T , typename TDev >

device_view<TDev, T[]> cms::alpakatools::make_device_view	(	TDev const &	device,
		T *	data,
		Extent	extent
	)

Definition at line 265 of file memory.h.

References data.

                                                                                       {
     return alpaka::ViewPlainPtr<TDev, T, Dim1D, Idx>(data, device, Vec1D{extent});
   }

◆ make_device_view() [3/4]

template<typename T , typename TDev >

std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_view<TDev, T> > cms::alpakatools::make_device_view	(	TDev const &	device,
		T &	data,
		Extent	extent
	)

Definition at line 271 of file memory.h.

References data.

                                                                {
     return alpaka::ViewPlainPtr<TDev, std::remove_extent_t<T>, Dim1D, Idx>(data, device, Vec1D{extent});
   }

◆ make_device_view() [4/4]

template<typename T , typename TDev >

std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, device_view<TDev, T> > cms::alpakatools::make_device_view	(	TDev const &	device,
		T &	data
	)

Definition at line 277 of file memory.h.

References data.

                                                 {
     return alpaka::ViewPlainPtr<TDev, std::remove_extent_t<T>, Dim1D, Idx>(data, device, Vec1D{std::extent_v<T>});
   }

◆ make_host_buffer() [1/9]

template<typename T >

std::enable_if_t<not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 65 of file memory.h.

References host().

                                                                           {
     return alpaka::allocBuf<T, Idx>(host(), Scalar{});
   }

◆ make_host_buffer() [2/9]

template<typename T >

std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( Extent extent )

Definition at line 71 of file memory.h.

References host().

                                   {
     return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(host(), Vec1D{extent});
   }

◆ make_host_buffer() [3/9]

template<typename T >

std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 77 of file memory.h.

References host().

                      {
     return alpaka::allocBuf<std::remove_extent_t<T>, Idx>(host(), Vec1D{std::extent_v<T>});
   }

◆ make_host_buffer() [4/9]

template<typename T , typename TPlatform >

std::enable_if_t<not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 85 of file memory.h.

References host().

                                                                           {
     using Platform = TPlatform;
     return alpaka::allocMappedBuf<Platform, T, Idx>(host(), platform<Platform>(), Scalar{});
   }

◆ make_host_buffer() [5/9]

template<typename T , typename TPlatform >

std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( Extent extent )

Definition at line 92 of file memory.h.

References host().

                                   {
     using Platform = TPlatform;
     return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(host(), platform<Platform>(), Vec1D{extent});
   }

◆ make_host_buffer() [6/9]

template<typename T , typename TPlatform >

std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( )

Definition at line 99 of file memory.h.

References host().

                      {
     using Platform = TPlatform;
     return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
         host(), platform<Platform>(), Vec1D{std::extent_v<T>});
   }

◆ make_host_buffer() [7/9]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and not std::is_array_v<T>, host_buffer<T> > cms::alpakatools::make_host_buffer ( TQueue const & queue )

Definition at line 109 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

                            {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<T, Idx>(host(), queue, Scalar{});
     } else {
       using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
       return alpaka::allocMappedBuf<Platform, T, Idx>(host(), platform<Platform>(), Scalar{});
     }
   }

◆ make_host_buffer() [8/9]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer	(	TQueue const &	queue,
		Extent	extent
	)

Definition at line 123 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

                                                        {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<std::remove_extent_t<T>, Idx>(host(), queue, Vec1D{extent});
     } else {
       using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
       return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
           host(), platform<Platform>(), Vec1D{extent});
     }
   }

◆ make_host_buffer() [9/9]

template<typename T , typename TQueue >

std::enable_if_t<alpaka::isQueue<TQueue> and cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_buffer<T> > cms::alpakatools::make_host_buffer ( TQueue const & queue )

Definition at line 137 of file memory.h.

References allocator_policy, Caching, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), host(), and createBeamHaloJobs::queue.

                                         {
     if constexpr (allocator_policy<alpaka::Dev<TQueue>> == AllocatorPolicy::Caching) {
       return allocCachedBuf<std::remove_extent_t<T>, Idx>(host(), queue, Vec1D{std::extent_v<T>});
     } else {
       using Platform = alpaka::Platform<alpaka::Dev<TQueue>>;
       return alpaka::allocMappedBuf<Platform, std::remove_extent_t<T>, Idx>(
           host(), platform<Platform>(), Vec1D{std::extent_v<T>});
     }
   }

◆ make_host_view() [1/4]

template<typename T >

std::enable_if_t<not std::is_array_v<T>, host_view<T> > cms::alpakatools::make_host_view ( T & data )

Definition at line 153 of file memory.h.

References data, and host().

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::SiPixelRawToClusterKernel< pixelTopology::Phase2 >::makePhase1ClustersAsync(), ALPAKA_ACCELERATOR_NAMESPACE::pixelDetails::SiPixelRawToClusterKernel< pixelTopology::Phase2 >::makePhase2ClustersAsync(), and TrackingRecHitDevice< TrackerTraits, TDev >::updateFromDevice().

                                                                              {
     return alpaka::ViewPlainPtr<DevHost, T, Dim0D, Idx>(&data, host(), Scalar{});
   }

◆ make_host_view() [2/4]

template<typename T >

host_view<T[]> cms::alpakatools::make_host_view	(	T *	data,
		Extent	extent
	)

Definition at line 158 of file memory.h.

References data, and host().

                                                         {
     return alpaka::ViewPlainPtr<DevHost, T, Dim1D, Idx>(data, host(), Vec1D{extent});
   }

◆ make_host_view() [3/4]

template<typename T >

std::enable_if_t<cms::is_unbounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_view<T> > cms::alpakatools::make_host_view	(	T &	data,
		Extent	extent
	)

Definition at line 164 of file memory.h.

References data, and host().

                                          {
     return alpaka::ViewPlainPtr<DevHost, std::remove_extent_t<T>, Dim1D, Idx>(data, host(), Vec1D{extent});
   }

◆ make_host_view() [4/4]

template<typename T >

std::enable_if_t<cms::is_bounded_array_v<T> and not std::is_array_v<std::remove_extent_t<T> >, host_view<T> > cms::alpakatools::make_host_view ( T & data )

Definition at line 170 of file memory.h.

References data, and host().

                           {
     return alpaka::ViewPlainPtr<DevHost, std::remove_extent_t<T>, Dim1D, Idx>(data, host(), Vec1D{std::extent_v<T>});
   }

◆ make_SimpleVector() [1/2]

template<class T >

SimpleVector<T> cms::alpakatools::make_SimpleVector	(	int	capacity,
		T *	data
	)

Definition at line 126 of file SimpleVector.h.

References gpuVertexFinder::capacity(), data, and runTheMatrix::ret.

                                                            {
     SimpleVector<T> ret;
     ret.construct(capacity, data);
     return ret;
   }

◆ make_SimpleVector() [2/2]

template<class T >

SimpleVector<T>* cms::alpakatools::make_SimpleVector	(	SimpleVector< T > *	mem,
		int	capacity,
		T *	data
	)

Definition at line 134 of file SimpleVector.h.

References gpuVertexFinder::capacity(), data, mem, and runTheMatrix::ret.

                                                                                   {
     auto ret = new (mem) SimpleVector<T>();
     ret->construct(capacity, data);
     return ret;
   }

◆ make_workdiv() [1/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

WorkDiv<Dim1D> cms::alpakatools::make_workdiv	(	Idx	blocks,
		Idx	elements
	)

inline

Definition at line 47 of file workdivision.h.

References gather_cfg::blocks, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and bookConverter::elements.

                                                                {
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim1D>(blocks, elements, Idx{1});
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
     }
   }

◆ make_workdiv() [2/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

WorkDiv<alpaka::Dim<TAcc> > cms::alpakatools::make_workdiv	(	const Vec< alpaka::Dim< TAcc >> &	blocks,
		const Vec< alpaka::Dim< TAcc >> &	elements
	)

inline

Definition at line 63 of file workdivision.h.

References gather_cfg::blocks, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), and bookConverter::elements.

                                                                                        {
     using Dim = alpaka::Dim<TAcc>;
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim>(blocks, elements, Vec<Dim>::ones());
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim>(blocks, Vec<Dim>::ones(), elements);
     }
   }

◆ module_backend_config()

void cms::alpakatools::module_backend_config ( edm::ConfigurationDescriptions & iDesc )

Definition at line 13 of file module_backend_config.cc.

References edm::ParameterSetDescription::addUntracked(), edm::ConfigurationDescriptions::defaultDescription(), edm::ParameterSetDescription::isLabelUnused(), kComment, AlCaHLTBitMon_QueryRunRegistry::string, and findQualityFiles::v.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ESProducer::prevalidate(), and ALPAKA_ACCELERATOR_NAMESPACE::ProducerBase< BaseT, Args >::prevalidate().

                                                                   {
     // the code below leads to 'alpaka = untracked.PSet(backend = untracked.string)' to be added to the generated cfi files
     // TODO: I don't know if this is a desired behavior for HLT
     edm::ParameterSetDescription descAlpaka;
     descAlpaka.addUntracked<std::string>("backend", "")
         ->setComment(
             "Alpaka backend for this module. Can be empty string (for the global default), 'serial_sync', or "
             " - depending on the architecture and available hardware - 'cuda_async', 'rocm_async'");
 
     if (iDesc.defaultDescription()) {
       if (iDesc.defaultDescription()->isLabelUnused(kPSetName)) {
         iDesc.defaultDescription()
             ->addUntracked<edm::ParameterSetDescription>(kPSetName, descAlpaka)
             ->setComment(kComment);
       }
     }
     for (auto& v : iDesc) {
       if (v.second.isLabelUnused(kPSetName)) {
         v.second.addUntracked<edm::ParameterSetDescription>(kPSetName, descAlpaka)->setComment(kComment);
       }
     }
   }

◆ once_per_block()

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

ALPAKA_FN_ACC constexpr bool cms::alpakatools::once_per_block ( TAcc const & acc )

inline

Definition at line 1394 of file workdivision.h.

                                                                       {
     return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) == Vec<alpaka::Dim<TAcc>>::zeros();
   }

◆ once_per_grid()

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

ALPAKA_FN_ACC constexpr bool cms::alpakatools::once_per_grid ( TAcc const & acc )

inline

Definition at line 1382 of file workdivision.h.

                                                                      {
     return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) == Vec<alpaka::Dim<TAcc>>::zeros();
   }

◆ platform()

template<typename TPlatform , typename = std::enable_if_t<alpaka::isPlatform<TPlatform>>>

TPlatform const& cms::alpakatools::platform ( )

inline

Definition at line 14 of file devices.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::AlpakaService::AlpakaService().

                                      {
     // initialise the platform the first time that this function is called
     static const auto platform = TPlatform{};
     return platform;
   }

◆ radixSort() [1/2]

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< std::is_unsigned< T >::value &&!requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSort	(	const TAcc &	acc,
		T const *	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size
	)

Definition at line 324 of file radixSort.h.

References a.

                                                                                  {
     radixSortImpl<TAcc, T, NS>(acc, a, ind, ind2, size, dummyReorder<TAcc, T>);
   }

◆ radixSort() [2/2]

template<typename TAcc , typename T , int NS = sizeof(T), typename std::enable_if< requires_single_thread_per_block_v< TAcc >, T >::type * = nullptr>

ALPAKA_FN_INLINE void cms::alpakatools::radixSort	(	const TAcc &	acc,
		T const *	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size
	)

Definition at line 357 of file radixSort.h.

References a, and testProducerWithPsetDescEmpty_cfi::i1.

                                                                                                              {
     static_assert(requires_single_thread_per_block_v<TAcc>, "CPU sort (not a radixSort) called wtth wrong accelerator");
     // Initialize the index array
     std::iota(ind, ind + size, 0);
     /*
     printf("std::stable_sort(a=%p, ind=%p, indmax=%p, size=%d)\n", a, ind, ind + size, size);
     for (uint32_t i=0; i<10 && i<size; i++) {
       printf ("a[%d]=%ld ", i, (long int)a[i]);
     }
     printf("\n");
     for (uint32_t i=0; i<10 && i<size; i++) {
       printf ("ind[%d]=%d ", i, ind[i]);
     }
     printf("\n");
     */
     std::stable_sort(ind, ind + size, [a](uint16_t i0, uint16_t i1) { return a[i0] < a[i1]; });
     /*
     for (uint32_t i=0; i<10 && i<size; i++) {
       printf ("ind[%d]=%d ", i, ind[i]);
     }
     printf("\n");
     */
   }

◆ radixSortImpl()

template<typename TAcc , typename T , int NS, typename RF >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSortImpl	(	const TAcc &	acc,
		T const *__restrict__	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size,
		RF	reorder
	)

Definition at line 97 of file radixSort.h.

References a, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::ALPAKA_ASSERT_ACC(), cms::cuda::assert(), cms::cudacompat::atomicAdd(), cms::cudacompat::atomicMax(), newFWLiteAna::bin, HltBtagPostValidation_cff::c, ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), mps_fire::i, heavyIonCSV_trainingSettings::idx, independent_group_elements(), createfilelist::int, dqmiolumiharvest::j, dqmdumpme::k, hltrates_dqm_sourceclient-live_cfg::offset, trackingPlots::reorder, contentValuesCheck::ss, submitPVValidationJobs::t, ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::threadIdxLocal(), dqmMemoryStats::total, and x.

                                                                                                           {
     if constexpr (!requires_single_thread_per_block_v<TAcc>) {
       const auto warpSize = alpaka::warp::getSize(acc);
       const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
       [[maybe_unused]] const uint32_t blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
       // we expect a power of 2 here
       assert(warpSize && (0 == (warpSize & (warpSize - 1))));
       const std::size_t warpMask = warpSize - 1;
 
       // Define the bin size (d=8 => 1 byte bin).
       constexpr int binBits = 8, dataBits = 8 * sizeof(T), totalSortingPassses = dataBits / binBits;
       // Make sure the slices are data aligned
       static_assert(0 == dataBits % binBits);
       // Make sure the NS parameter makes sense
       static_assert(NS > 0 && NS <= sizeof(T));
       constexpr int binsNumber = 1 << binBits;
       constexpr int binsMask = binsNumber - 1;
       // Prefix scan iterations. NS is counted in full bytes and not slices.
       constexpr int initialSortingPass = int(sizeof(T)) - NS;
 
       // Count/index for the prefix scan
       // TODO: rename
       auto& c = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
       // Temporary storage for prefix scan. Only really needed for first-of-warp keeping
       // Then used for thread to bin mapping TODO: change type to byte and remap to
       auto& ct = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
       // Bin to thread index mapping (used to store the highest thread index within a bin number
       // batch of threads.
       // TODO: currently initialized to an invalid value, but could also be initialized to the
       // lowest possible value (change to bytes?)
       auto& cu = alpaka::declareSharedVar<int32_t[binsNumber], __COUNTER__>(acc);
       // TODO we could also have an explicit caching of the current index for each thread.
 
       // TODO: do those have to be shared?
       auto& ibs = alpaka::declareSharedVar<int, __COUNTER__>(acc);
       auto& currentSortingPass = alpaka::declareSharedVar<int, __COUNTER__>(acc);
 
       ALPAKA_ASSERT_ACC(size > 0);
       // TODO: is this a hard requirement?
       ALPAKA_ASSERT_ACC(blockDimension >= binsNumber);
 
       currentSortingPass = initialSortingPass;
 
       auto j = ind;
       auto k = ind2;
 
       // Initializer index order to trivial increment.
       for (auto idx : independent_group_elements(acc, size)) {
         j[idx] = idx;
       }
       alpaka::syncBlockThreads(acc);
 
       // Iterate on the slices of the data.
       while (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, (currentSortingPass < totalSortingPassses))) {
         for (auto idx : independent_group_elements(acc, binsNumber)) {
           c[idx] = 0;
         }
         alpaka::syncBlockThreads(acc);
         const auto sortingPassShift = binBits * currentSortingPass;
 
         // fill bins (count elements in each bin)
         for (auto idx : independent_group_elements(acc, size)) {
           auto bin = (a[j[idx]] >> sortingPassShift) & binsMask;
           alpaka::atomicAdd(acc, &c[bin], 1, alpaka::hierarchy::Threads{});
         }
         alpaka::syncBlockThreads(acc);
 
         if (!threadIdxLocal && 1 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]) {
           //          printf("Pass=%d, Block=%d, ", currentSortingPass - 1, alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]);
           size_t total = 0;
           for (int i = 0; i < (int)binsNumber; i++) {
             //            printf("count[%d]=%d ", i, c[i] );
             total += c[i];
           }
           //          printf("total=%zu\n", total);
           assert(total == size);
         }
         // prefix scan "optimized"???...
         // TODO: we might be able to reuse the warpPrefixScan function
         // Warp level prefix scan
         for (auto idx : independent_group_elements(acc, binsNumber)) {
           auto x = c[idx];
           auto laneId = idx & warpMask;
 
           for (int offset = 1; offset < warpSize; offset <<= 1) {
             auto y = alpaka::warp::shfl(acc, x, laneId - offset);
             if (laneId >= (uint32_t)offset)
               x += y;
           }
           ct[idx] = x;
         }
         alpaka::syncBlockThreads(acc);
 
         // Block level completion of prefix scan (add last sum of each preceding warp)
         for (auto idx : independent_group_elements(acc, binsNumber)) {
           auto ss = (idx / warpSize) * warpSize - 1;
           c[idx] = ct[idx];
           for (int i = ss; i > 0; i -= warpSize)
             c[idx] += ct[i];
         }
         // Post prefix scan, c[bin] contains the offsets in index counts to the last index +1 for each bin
 
         /*
         //prefix scan for the nulls  (for documentation)
         if (threadIdxLocal==0)
           for (int i = 1; i < sb; ++i) c[i] += c[i-1];
         */
 
         // broadcast: we will fill the new index array downward, from offset c[bin], with one thread per
         // bin, working on one set of bin size elements at a time.
         // This will reorder the indices by the currently considered slice, otherwise preserving the previous order.
         ibs = size - 1;
         alpaka::syncBlockThreads(acc);
 
         // Iterate on bin-sized slices to (size - 1) / binSize + 1 iterations
         while (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, ibs >= 0)) {
           // Init
           for (auto idx : independent_group_elements(acc, binsNumber)) {
             cu[idx] = -1;
             ct[idx] = -1;
           }
           alpaka::syncBlockThreads(acc);
 
           // Find the highest index for all the threads dealing with a given bin (in cu[])
           // Also record the bin for each thread (in ct[])
           for (auto idx : independent_group_elements(acc, binsNumber)) {
             int i = ibs - idx;
             int32_t bin = -1;
             if (i >= 0) {
               bin = (a[j[i]] >> sortingPassShift) & binsMask;
               ct[idx] = bin;
               alpaka::atomicMax(acc, &cu[bin], int(i), alpaka::hierarchy::Threads{});
             }
           }
           alpaka::syncBlockThreads(acc);
 
           // FIXME: we can slash a memory access.
           for (auto idx : independent_group_elements(acc, binsNumber)) {
             int i = ibs - idx;
             // Are we still in inside the data?
             if (i >= 0) {
               int32_t bin = ct[idx];
               // Are we the thread with the highest index (from previous pass)?
               if (cu[bin] == i) {
                 // With the highest index, we are actually the lowest thread number. We will
                 // work "on behalf of" the higher thread numbers (including ourselves)
                 // No way around scanning and testing for bin in ct[otherThread] number to find the other threads
                 for (int peerThreadIdx = idx; peerThreadIdx < binsNumber; peerThreadIdx++) {
                   if (ct[peerThreadIdx] == bin) {
                     k[--c[bin]] = j[ibs - peerThreadIdx];
                   }
                 }
               }
             }
             /*
             int32_t bin = (i >= 0 ? ((a[j[i]] >> sortingPassShift) & binsMask) : -1);
             if (i >= 0 && i == cu[bin])  // ensure to keep them in order: only one thread per bin is active, rest is idle.
               // 
               for (int ii = idx; ii < sb; ++ii)
                 if (ct[ii] == bin) {
                   auto oi = ii - idx;
                   // assert(i>=oi);if(i>=oi)
                   k[--c[bin]] = j[i - oi]; // i = ibs - idx, oi = ii - idx => i - oi = ibs - ii;
                 }
             */
           }
           alpaka::syncBlockThreads(acc);
 
           if (threadIdxLocal == 0) {
             ibs -= binsNumber;
             // https://github.com/cms-patatrack/pixeltrack-standalone/pull/210
             // TODO: is this really needed?
             alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
           }
           alpaka::syncBlockThreads(acc);
         }
 
         /*
         // broadcast for the nulls  (for documentation)
         if (threadIdxLocal==0)
         for (int i=size-first-1; i>=0; i--) { // =blockDim.x) {
           auto bin = (a[j[i]] >> d*p)&(sb-1);
           auto ik = atomicSub(&c[bin],1);
           k[ik-1] = j[i];
         }
         */
 
         alpaka::syncBlockThreads(acc);
         ALPAKA_ASSERT_ACC(c[0] == 0);
 
         // swap (local, ok)
         auto t = j;
         j = k;
         k = t;
 
         const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
         if (threadIdxLocal == 0)
           ++currentSortingPass;
         alpaka::syncBlockThreads(acc);
       }
 
       if ((dataBits != 8) && (0 == (NS & 1)))
         ALPAKA_ASSERT_ACC(j ==
                           ind);  // dataBits/binBits is even so ind is correct (the result is in the right location)
 
       // TODO this copy is (doubly?) redundant with the reorder
       if (j != ind)  // odd number of sorting passes, we need to move the result to the right array (ind[])
         for (auto idx : independent_group_elements(acc, size)) {
           ind[idx] = ind2[idx];
         };
 
       alpaka::syncBlockThreads(acc);
 
       // now move negative first... (if signed)
       // TODO: the ind2 => ind copy should have beed deferred. We should pass (j != ind) as an extra parameter
       reorder(acc, a, ind, ind2, size);
     } else {
       //static_assert(false);
     }
   }

◆ radixSortMulti()

template<typename TAcc , typename T , int NS = sizeof(T)>

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::radixSortMulti	(	const TAcc &	acc,
		T const *	v,
		uint16_t *	index,
		uint32_t const *	offsets,
		uint16_t *	workspace
	)

Definition at line 382 of file radixSort.h.

References a, cms::cuda::assert(), cms::cudacompat::blockIdx, unpackBuffers-CaloStage1::offsets, findQualityFiles::v, and ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ws.

                                                                                                   {
     // TODO: check
     // Sort multiple blocks of data in v[] separated by in chunks located at offsets[]
     // extern __shared__ uint16_t ws[];
     uint16_t* ws = alpaka::getDynSharedMem<uint16_t>(acc);
 
     const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
     auto a = v + offsets[blockIdx];
     auto ind = index + offsets[blockIdx];
     auto ind2 = nullptr == workspace ? ws : workspace + offsets[blockIdx];
     auto size = offsets[blockIdx + 1] - offsets[blockIdx];
     assert(offsets[blockIdx + 1] >= offsets[blockIdx]);
     if (size > 0)
       radixSort<TAcc, T, NS>(acc, a, ind, ind2, size);
   }

◆ reorderFloat()

template<typename TAcc , typename T >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::reorderFloat	(	const TAcc &	acc,
		T const *	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size
	)

Definition at line 54 of file radixSort.h.

References a, heavyIonCSV_trainingSettings::idx, and independent_group_elements().

                                                                                  {
     //move negative first...
 
     auto& firstNeg = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
     firstNeg = a[ind[0]] < 0 ? 0 : size;
     alpaka::syncBlockThreads(acc);
 
     // find first negative
     for (auto idx : independent_group_elements(acc, size - 1)) {
       if ((a[ind[idx]] ^ a[ind[idx + 1]]) < 0)
         firstNeg = idx + 1;
     }
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, firstNeg, size)) {
       ind2[size - idx - 1] = ind[idx];
     }
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, firstNeg)) {
       ind2[idx + size - firstNeg] = ind[idx];
     }
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, size)) {
       ind[idx] = ind2[idx];
     }
   }

◆ reorderSigned()

template<typename TAcc , typename T >

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::reorderSigned	(	const TAcc &	acc,
		T const *	a,
		uint16_t *	ind,
		uint16_t *	ind2,
		uint32_t	size
	)

Definition at line 21 of file radixSort.h.

References a, heavyIonCSV_trainingSettings::idx, and independent_group_elements().

                                                                                  {
     //move negative first...
 
     auto& firstNeg = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
     firstNeg = a[ind[0]] < 0 ? 0 : size;
     alpaka::syncBlockThreads(acc);
 
     // find first negative
     for (auto idx : independent_group_elements(acc, size - 1)) {
       if ((a[ind[idx]] ^ a[ind[idx + 1]]) < 0) {
         firstNeg = idx + 1;
       }
     }
 
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, firstNeg, size)) {
       ind2[idx - firstNeg] = ind[idx];
     }
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, firstNeg)) {
       ind2[idx + size - firstNeg] = ind[idx];
     }
     alpaka::syncBlockThreads(acc);
 
     for (auto idx : independent_group_elements(acc, size)) {
       ind[idx] = ind2[idx];
     }
   }

◆ round_up_by()

constexpr Idx cms::alpakatools::round_up_by	(	Idx	value,
		Idx	divisor
	)

inline

Definition at line 17 of file workdivision.h.

17 { return (value + divisor - 1) / divisor * divisor; }

value

Definition: value.py:1

◆ toBackend()

Backend cms::alpakatools::toBackend ( std::string_view name )

Definition at line 13 of file Backend.cc.

References cms::Exception::addContext(), HLT_2024v14_cff::distance, spr::find(), newFWLiteAna::found, and Skims_PA_cff::name.

                                          {
     auto found = std::find(backendNames.begin(), backendNames.end(), name);
     if (found == backendNames.end()) {
       cms::Exception ex("EnumNotFound");
       ex << "Invalid backend name '" << name << "'";
       ex.addContext("Calling cms::alpakatools::toBackend()");
       throw ex;
     }
     return static_cast<Backend>(std::distance(backendNames.begin(), found));
   }

◆ toString()

std::string_view cms::alpakatools::toString ( Backend backend )

Definition at line 24 of file Backend.cc.

References cms::Exception::addContext(), HLT_2024v14_cff::backend, size, and heppy_batch::val.

Referenced by TestAlpakaObjectAnalyzer::analyze(), and TestAlpakaAnalyzer::analyze().

                                            {
     auto val = static_cast<unsigned short>(backend);
     if (val >= static_cast<unsigned short>(Backend::size)) {
       cms::Exception ex("InvalidEnumValue");
       ex << "Invalid backend enum value " << val;
       ex.addContext("Calling cms::alpakatools::toString()");
       throw ex;
     }
     return backendNames[val];
   }

◆ uniform_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 311 of file workdivision.h.

References writedatasetfile::args.

                                                                              {
     return detail::UniformElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_along	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 325 of file workdivision.h.

References writedatasetfile::args.

                                                                                    {
     return detail::UniformElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_elements_nd() [1/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_nd ( TAcc const & acc )

inline

Definition at line 575 of file workdivision.h.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_2d::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::FillRhfIndex::operator()().

                                                                  {
     return detail::UniformElementsND<TAcc>(acc);
   }

◆ uniform_elements_nd() [2/2]

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_nd	(	TAcc const &	acc,
		alpaka::Vec< alpaka::Dim< TAcc >, Idx >	extent
	)

inline

Definition at line 580 of file workdivision.h.

                                                                                                          {
     return detail::UniformElementsND<TAcc>(acc, extent);
   }

◆ uniform_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_x	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 337 of file workdivision.h.

References writedatasetfile::args.

                                                                                {
     return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_y	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 344 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::CAFishbone< TrackerTraits >::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels::Kernel_connect< TrackerTraits >::operator()().

                                                                                {
     return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_elements_z	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 351 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep_pulseMatrices_sameNumberOfSamples::operator()().

                                                                                {
     return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_group_elements()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 978 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_1d_and_initialize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_makeratio::operator()().

                                                                                    {
     return detail::UniformGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_group_elements_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_along	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 992 of file workdivision.h.

References writedatasetfile::args.

                                                                                          {
     return detail::UniformGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_group_elements_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_x	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1005 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

                                                                                      {
     return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_group_elements_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_y	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1012 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep1d_sameNumberOfSamples::operator()().

                                                                                      {
     return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_group_elements_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_group_elements_z	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 1019 of file workdivision.h.

References writedatasetfile::args.

                                                                                      {
     return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_groups()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 759 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_prep_1d_and_initialize::operator()(), and ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit::Kernel_time_compute_makeratio::operator()().

                                                                            {
     return detail::UniformGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_groups_along()

template<typename TAcc , std::size_t Dim, typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_along	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 773 of file workdivision.h.

References writedatasetfile::args.

                                                                                  {
     return detail::UniformGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_groups_x()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_x	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 785 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_minimize< NSAMPLES, NPULSES >::operator()().

                                                                              {
     return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_groups_y()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_y	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 792 of file workdivision.h.

References writedatasetfile::args.

Referenced by ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi::Kernel_prep1d_sameNumberOfSamples::operator()().

                                                                              {
     return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
   }

◆ uniform_groups_z()

template<typename TAcc , typename... TArgs, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2>

ALPAKA_FN_ACC auto cms::alpakatools::uniform_groups_z	(	TAcc const &	acc,
		TArgs...	args
	)

inline

Definition at line 799 of file workdivision.h.

References writedatasetfile::args.

                                                                              {
     return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
   }

◆ warpPrefixScan() [1/2]

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::warpPrefixScan	(	const TAcc &	acc,
		int32_t	laneId,
		T const *	ci,
		T *	co,
		uint32_t	i,
		bool	active = `true`
	)

Definition at line 23 of file prefixScan.h.

References CMS_UNROLL_LOOP, cms::cuda::co, DTskim_cfg::dataType, mps_fire::i, hltrates_dqm_sourceclient-live_cfg::offset, and x.

Referenced by blockPrefixScan(), and warpPrefixScan().

                                                                                            {
     // ci and co may be the same
     T x = active ? ci[i] : 0;
     CMS_UNROLL_LOOP
     for (int32_t offset = 1; offset < alpaka::warp::getSize(acc); offset <<= 1) {
       // Force the exact type for integer types otherwise the compiler will find the template resolution ambiguous.
       using dataType = std::conditional_t<std::is_floating_point_v<T>, T, std::int32_t>;
       T y = alpaka::warp::shfl(acc, static_cast<dataType>(x), laneId - offset);
       if (laneId >= offset)
         x += y;
     }
     if (active)
       co[i] = x;
   }

◆ warpPrefixScan() [2/2]

template<typename TAcc , typename T , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

ALPAKA_FN_ACC ALPAKA_FN_INLINE void cms::alpakatools::warpPrefixScan	(	const TAcc &	acc,
		int32_t	laneId,
		T *	c,
		uint32_t	i,
		bool	active = `true`
	)

Definition at line 40 of file prefixScan.h.

References HltBtagPostValidation_cff::c, mps_fire::i, and warpPrefixScan().

                                                                              {
     warpPrefixScan(acc, laneId, c, c, i, active);
   }

Variable Documentation

◆ allocator_policy

template<typename TDev , typename = std::enable_if_t<alpaka::isDevice<TDev>>>

constexpr AllocatorPolicy cms::alpakatools::allocator_policy = AllocatorPolicy::Synchronous

inline

Definition at line 17 of file AllocatorPolicy.h.

Referenced by make_device_buffer(), and make_host_buffer().

◆ requires_single_thread_per_block_v

template<typename TAcc , typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>

constexpr bool cms::alpakatools::requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value

inline

Definition at line 43 of file workdivision.h.

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Typedef Documentation

◆ const_device_buffer

◆ const_host_buffer

◆ device_buffer

◆ device_view

◆ host_buffer

◆ host_view

Enumeration Type Documentation

◆ AllocatorPolicy

◆ Backend

Function Documentation

◆ allocCachedBuf()

◆ blockPrefixScan() [1/2]

◆ blockPrefixScan() [2/2]

◆ chooseDevice()

◆ devices()

◆ divide_up_by()

◆ dummyReorder()

◆ fillManyFromVector() [1/2]

◆ fillManyFromVector() [2/2]

◆ forEachInBins()

◆ forEachInWindow()

◆ getDeviceCachingAllocator()

◆ getEventCache()

◆ getHostCachingAllocator()

◆ getQueueCache()

◆ host()

◆ host_platform()

◆ independent_group_elements()

◆ independent_group_elements_along()

◆ independent_group_elements_x()

◆ independent_group_elements_y()

◆ independent_group_elements_z()

◆ independent_groups()

◆ independent_groups_along()

◆ independent_groups_x()

◆ independent_groups_y()

◆ independent_groups_z()

◆ isPowerOf2()

◆ make_device_buffer() [1/6]

◆ make_device_buffer() [2/6]

◆ make_device_buffer() [3/6]

◆ make_device_buffer() [4/6]

◆ make_device_buffer() [5/6]

◆ make_device_buffer() [6/6]

◆ make_device_view() [1/4]

◆ make_device_view() [2/4]

◆ make_device_view() [3/4]

◆ make_device_view() [4/4]

◆ make_host_buffer() [1/9]

◆ make_host_buffer() [2/9]

◆ make_host_buffer() [3/9]

◆ make_host_buffer() [4/9]

◆ make_host_buffer() [5/9]

◆ make_host_buffer() [6/9]

◆ make_host_buffer() [7/9]

◆ make_host_buffer() [8/9]

◆ make_host_buffer() [9/9]

◆ make_host_view() [1/4]

◆ make_host_view() [2/4]

◆ make_host_view() [3/4]

◆ make_host_view() [4/4]

◆ make_SimpleVector() [1/2]

◆ make_SimpleVector() [2/2]

◆ make_workdiv() [1/2]

◆ make_workdiv() [2/2]

◆ module_backend_config()

◆ once_per_block()

◆ once_per_grid()

◆ platform()

◆ radixSort() [1/2]

◆ radixSort() [2/2]

◆ radixSortImpl()

◆ radixSortMulti()