d9/d97/workdivision_8h_source.html

 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h
 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h

 #include <type_traits>

 #include <alpaka/alpaka.hpp>

 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"

 namespace cms::alpakatools {

   using namespace alpaka_common;

   // If the first argument is not a multiple of the second argument, round it up to the next multiple
   inline constexpr Idx round_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor * divisor; }

   // Return the integer division of the first argument by the second argument, rounded up to the next integer
   inline constexpr Idx divide_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor; }

   // Trait describing whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   struct requires_single_thread_per_block : public std::true_type {};

 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
   template <typename TDim>
   struct requires_single_thread_per_block<alpaka::AccGpuCudaRt<TDim, Idx>> : public std::false_type {};
 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED

 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
   template <typename TDim>
   struct requires_single_thread_per_block<alpaka::AccGpuHipRt<TDim, Idx>> : public std::false_type {};
 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED

   // Whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   inline constexpr bool requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value;

   // Create an accelerator-dependent work division for 1-dimensional kernels
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
   inline WorkDiv<Dim1D> make_workdiv(Idx blocks, Idx elements) {
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim1D>(blocks, elements, Idx{1});
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
     }
   }

   // Create the accelerator-dependent workdiv for N-dimensional kernels
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   inline WorkDiv<alpaka::Dim<TAcc>> make_workdiv(const Vec<alpaka::Dim<TAcc>>& blocks,
                                                  const Vec<alpaka::Dim<TAcc>>& elements) {
     using Dim = alpaka::Dim<TAcc>;
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim>(blocks, elements, Vec<Dim>::ones());
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim>(blocks, Vec<Dim>::ones(), elements);
     }
   }

   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
   class elements_with_stride {
   public:
     ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
           first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           extent_{stride_} {}

     ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc, Idx extent)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
           first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           extent_{extent} {}

     class iterator {
       friend class elements_with_stride;

       ALPAKA_FN_ACC inline iterator(Idx elements, Idx stride, Idx extent, Idx first)
           : elements_{elements},
             stride_{stride},
             extent_{extent},
             first_{std::min(first, extent)},
             index_{first_},
             last_{std::min(first + elements, extent)} {}

     public:
       ALPAKA_FN_ACC inline Idx operator*() const { return index_; }

       // pre-increment the iterator
       ALPAKA_FN_ACC inline iterator& operator++() {
         if constexpr (requires_single_thread_per_block_v<TAcc>) {
           // increment the index along the elements processed by the current thread
           ++index_;
           if (index_ < last_)
             return *this;
         }

         // increment the thread index with the grid stride
         first_ += stride_;
         index_ = first_;
         last_ = std::min(first_ + elements_, extent_);
         if (index_ < extent_)
           return *this;

         // the iterator has reached or passed the end of the extent, clamp it to the extent
         first_ = extent_;
         index_ = extent_;
         last_ = extent_;
         return *this;
       }

       // post-increment the iterator
       ALPAKA_FN_ACC inline iterator operator++(int) {
         iterator old = *this;
         ++(*this);
         return old;
       }

       ALPAKA_FN_ACC inline bool operator==(iterator const& other) const {
         return (index_ == other.index_) and (first_ == other.first_);
       }

       ALPAKA_FN_ACC inline bool operator!=(iterator const& other) const { return not(*this == other); }

     private:
       // non-const to support iterator copy and assignment
       Idx elements_;
       Idx stride_;
       Idx extent_;
       // modified by the pre/post-increment operator
       Idx first_;
       Idx index_;
       Idx last_;
     };

     ALPAKA_FN_ACC inline iterator begin() const { return iterator(elements_, stride_, extent_, first_); }

     ALPAKA_FN_ACC inline iterator end() const { return iterator(elements_, stride_, extent_, extent_); }

   private:
     const Idx elements_;
     const Idx first_;
     const Idx stride_;
     const Idx extent_;
   };

   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
   class elements_with_stride_nd {
   public:
     using Dim = alpaka::Dim<TAcc>;
     using Vec = alpaka::Vec<Dim, Idx>;

     ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
           first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           extent_{stride_} {}

     ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc, Vec extent)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
           first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           extent_{extent} {}

     class iterator {
       friend class elements_with_stride_nd;

     public:
       ALPAKA_FN_ACC inline Vec operator*() const { return index_; }

       // pre-increment the iterator
       ALPAKA_FN_ACC constexpr inline iterator operator++() {
         increment();
         return *this;
       }

       // post-increment the iterator
       ALPAKA_FN_ACC constexpr inline iterator operator++(int) {
         iterator old = *this;
         increment();
         return old;
       }

       ALPAKA_FN_ACC constexpr inline bool operator==(iterator const& other) const { return (index_ == other.index_); }

       ALPAKA_FN_ACC constexpr inline bool operator!=(iterator const& other) const { return not(*this == other); }

     private:
       // private, explicit constructor
       ALPAKA_FN_ACC inline iterator(elements_with_stride_nd const* loop, Vec first)
           : loop_{loop},
             thread_{alpaka::elementwise_min(first, loop->extent_)},
             range_{alpaka::elementwise_min(first + loop->elements_, loop->extent_)},
             index_{thread_} {}

       template <size_t I>
       ALPAKA_FN_ACC inline constexpr bool nth_elements_loop() {
         bool overflow = false;
         ++index_[I];
         if (index_[I] >= range_[I]) {
           index_[I] = thread_[I];
           overflow = true;
         }
         return overflow;
       }

       template <size_t N>
       ALPAKA_FN_ACC inline constexpr bool do_elements_loops() {
         if constexpr (N == 0) {
           // overflow
           return true;
         } else {
           if (not nth_elements_loop<N - 1>()) {
             return false;
           } else {
             return do_elements_loops<N - 1>();
           }
         }
       }

       template <size_t I>
       ALPAKA_FN_ACC inline constexpr bool nth_strided_loop() {
         bool overflow = false;
         thread_[I] += loop_->stride_[I];
         if (thread_[I] >= loop_->extent_[I]) {
           thread_[I] = loop_->first_[I];
           overflow = true;
         }
         index_[I] = thread_[I];
         range_[I] = std::min(thread_[I] + loop_->elements_[I], loop_->extent_[I]);
         return overflow;
       }

       template <size_t N>
       ALPAKA_FN_ACC inline constexpr bool do_strided_loops() {
         if constexpr (N == 0) {
           // overflow
           return true;
         } else {
           if (not nth_strided_loop<N - 1>()) {
             return false;
           } else {
             return do_strided_loops<N - 1>();
           }
         }
       }

       // increment the iterator
       ALPAKA_FN_ACC inline constexpr void increment() {
         if constexpr (requires_single_thread_per_block_v<TAcc>) {
           // linear N-dimensional loops over the elements associated to the thread;
           // do_elements_loops<>() returns true if any of those loops overflows
           if (not do_elements_loops<Dim::value>()) {
             // the elements loops did not overflow, return the next index
             return;
           }
         }

         // strided N-dimensional loop over the threads in the kernel launch grid;
         // do_strided_loops<>() returns true if any of those loops overflows
         if (not do_strided_loops<Dim::value>()) {
           // the strided loops did not overflow, return the next index
           return;
         }

         // the iterator has reached or passed the end of the extent, clamp it to the extent
         thread_ = loop_->extent_;
         range_ = loop_->extent_;
         index_ = loop_->extent_;
       }

       // const pointer to the elements_with_stride_nd that the iterator refers to
       const elements_with_stride_nd* loop_;

       // modified by the pre/post-increment operator
       Vec thread_;  // first element processed by this thread
       Vec range_;   // last element processed by this thread
       Vec index_;   // current element processed by this thread
     };

     ALPAKA_FN_ACC inline iterator begin() const { return iterator{this, first_}; }

     ALPAKA_FN_ACC inline iterator end() const { return iterator{this, extent_}; }

   private:
     const Vec elements_;
     const Vec first_;
     const Vec stride_;
     const Vec extent_;
   };

 }  // namespace cms::alpakatools

 #endif  // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
cms::alpakatools::elements_with_stride_nd::iterator::nth_strided_loop
ALPAKA_FN_ACC constexpr bool nth_strided_loop()
Definition: workdivision.h:235

cms::alpakatools::elements_with_stride_nd::elements_with_stride_nd
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc, Vec extent)
Definition: workdivision.h:172

cms::alpakatools::elements_with_stride_nd::iterator::operator==
ALPAKA_FN_ACC constexpr bool operator==(iterator const &other) const
Definition: workdivision.h:197

cms::alpakatools::elements_with_stride::iterator::index_
Idx index_
Definition: workdivision.h:145

cms::alpakatools::elements_with_stride_nd::iterator::increment
ALPAKA_FN_ACC constexpr void increment()
Definition: workdivision.h:262

heppy_loop.loop
loop
Definition: heppy_loop.py:28

cms::alpakatools::elements_with_stride::first_
const Idx first_
Definition: workdivision.h:155

cms::alpakatools::elements_with_stride::elements_with_stride
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc)
Definition: workdivision.h:76

cms::alpakatools::elements_with_stride::iterator::last_
Idx last_
Definition: workdivision.h:146

cms::alpakatools::make_workdiv
WorkDiv< Dim1D > make_workdiv(Idx blocks, Idx elements)
Definition: workdivision.h:41

cms::alpakatools::divide_up_by
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:19

cms::alpakatools::elements_with_stride::elements_with_stride
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc, Idx extent)
Definition: workdivision.h:82

cms::alpakatools::elements_with_stride::iterator::elements_
Idx elements_
Definition: workdivision.h:140

cms::alpakatools::elements_with_stride_nd::iterator
Definition: workdivision.h:178

alpaka_common::Idx
uint32_t Idx
Definition: config.h:13

cms::alpakatools::elements_with_stride_nd
Definition: workdivision.h:161

cms::alpakatools::elements_with_stride_nd::elements_
const Vec elements_
Definition: workdivision.h:299

cms::alpakatools::elements_with_stride_nd::iterator::thread_
Vec thread_
Definition: workdivision.h:289

cms::alpakatools::elements_with_stride_nd::iterator::loop_
const elements_with_stride_nd * loop_
Definition: workdivision.h:286

cms::alpakatools::elements_with_stride::begin
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:149

cms::alpakatools::elements_with_stride::iterator::stride_
Idx stride_
Definition: workdivision.h:141

cms::alpakatools::elements_with_stride::iterator
Definition: workdivision.h:88

alpaka_common
Definition: config.h:10

cms::alpakatools::elements_with_stride_nd::iterator::operator*
ALPAKA_FN_ACC Vec operator*() const
Definition: workdivision.h:182

cms::alpakatools
Definition: PortableCollection.h:44

cms::alpakatools::elements_with_stride::iterator::first_
Idx first_
Definition: workdivision.h:144

cms::alpakatools::elements_with_stride::iterator::extent_
Idx extent_
Definition: workdivision.h:142

cms::alpakatools::elements_with_stride_nd::elements_with_stride_nd
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc)
Definition: workdivision.h:166

gather_cfg.blocks
blocks
Definition: gather_cfg.py:90

alpaka_common::WorkDiv
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
Definition: config.h:30

cms::alpakatools::elements_with_stride_nd::iterator::nth_elements_loop
ALPAKA_FN_ACC constexpr bool nth_elements_loop()
Definition: workdivision.h:210

cms::alpakatools::elements_with_stride::stride_
const Idx stride_
Definition: workdivision.h:156

cms::alpakatools::elements_with_stride::iterator::operator*
ALPAKA_FN_ACC Idx operator*() const
Definition: workdivision.h:100

trackingPlots.other
other
Definition: trackingPlots.py:1464

cms::alpakatools::elements_with_stride::end
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:151

cms::alpakatools::round_up_by
constexpr Idx round_up_by(Idx value, Idx divisor)
Definition: workdivision.h:16

cms::alpakatools::elements_with_stride::iterator::iterator
ALPAKA_FN_ACC iterator(Idx elements, Idx stride, Idx extent, Idx first)
Definition: workdivision.h:91

SiStripPI::min
Definition: SiStripPayloadInspectorHelper.h:178

Exhume::I
const std::complex< double > I
Definition: I.h:8

alpaka
Definition: HostOnlyTask.h:9

cms::alpakatools::elements_with_stride::extent_
const Idx extent_
Definition: workdivision.h:157

value
Definition: value.py:1

cms::alpakatools::elements_with_stride_nd::end
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:296

cms::alpakatools::elements_with_stride_nd::iterator::operator++
ALPAKA_FN_ACC constexpr iterator operator++()
Definition: workdivision.h:185

traits.h

cms::alpakatools::elements_with_stride_nd::Vec
alpaka::Vec< Dim, Idx > Vec
Definition: workdivision.h:164

cms::alpakatools::elements_with_stride::iterator::operator++
ALPAKA_FN_ACC iterator operator++(int)
Definition: workdivision.h:126

cms::alpakatools::elements_with_stride
Definition: workdivision.h:74

N
#define N
Definition: blowfish.cc:9

cms::alpakatools::elements_with_stride_nd::first_
const Vec first_
Definition: workdivision.h:300

cms::alpakatools::requires_single_thread_per_block
Definition: workdivision.h:23

alpaka_common::Vec
alpaka::Vec< TDim, Idx > Vec
Definition: config.h:23

cms::alpakatools::elements_with_stride_nd::extent_
const Vec extent_
Definition: workdivision.h:302

cms::alpakatools::elements_with_stride_nd::iterator::do_elements_loops
ALPAKA_FN_ACC constexpr bool do_elements_loops()
Definition: workdivision.h:221

cms::alpakatools::elements_with_stride_nd::Dim
alpaka::Dim< TAcc > Dim
Definition: workdivision.h:163

cms::alpakatools::elements_with_stride_nd::iterator::range_
Vec range_
Definition: workdivision.h:290

dqmdumpme.first
first
Definition: dqmdumpme.py:55

cms::alpakatools::elements_with_stride::iterator::operator==
ALPAKA_FN_ACC bool operator==(iterator const &other) const
Definition: workdivision.h:132

cms::alpakatools::elements_with_stride_nd::begin
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:294

cms::alpakatools::elements_with_stride_nd::stride_
const Vec stride_
Definition: workdivision.h:301

cms::alpakatools::elements_with_stride::iterator::operator++
ALPAKA_FN_ACC iterator & operator++()
Definition: workdivision.h:103

cms::alpakatools::elements_with_stride_nd::iterator::operator++
ALPAKA_FN_ACC constexpr iterator operator++(int)
Definition: workdivision.h:191

config.h

gpuPixelDoublets::stride
auto stride
Definition: gpuPixelDoubletsAlgos.h:141

cms::alpakatools::requires_single_thread_per_block_v
constexpr bool requires_single_thread_per_block_v
Definition: workdivision.h:37

cms::alpakatools::elements_with_stride_nd::iterator::index_
Vec index_
Definition: workdivision.h:291

cms::alpakatools::elements_with_stride_nd::iterator::iterator
ALPAKA_FN_ACC iterator(elements_with_stride_nd const *loop, Vec first)
Definition: workdivision.h:203

bookConverter.elements
elements
Definition: bookConverter.py:147

cms::alpakatools::elements_with_stride_nd::iterator::do_strided_loops
ALPAKA_FN_ACC constexpr bool do_strided_loops()
Definition: workdivision.h:248

cms::alpakatools::elements_with_stride_nd::iterator::operator!=
ALPAKA_FN_ACC constexpr bool operator!=(iterator const &other) const
Definition: workdivision.h:199

cms::alpakatools::elements_with_stride::iterator::operator!=
ALPAKA_FN_ACC bool operator!=(iterator const &other) const
Definition: workdivision.h:136

cms::alpakatools::elements_with_stride::elements_
const Idx elements_
Definition: workdivision.h:154