d9/d97/workdivision_8h_source.html

 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h
 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h

 #include <type_traits>

 #include <alpaka/alpaka.hpp>

 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/traits.h"

 namespace cms::alpakatools {

   using namespace alpaka_common;

   // If the first argument is not a multiple of the second argument, round it up to the next multiple
   inline constexpr Idx round_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor * divisor; }

   // Return the integer division of the first argument by the second argument, rounded up to the next integer
   inline constexpr Idx divide_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor; }

   // Trait describing whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   struct requires_single_thread_per_block : public std::true_type {};

 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
   template <typename TDim>
   struct requires_single_thread_per_block<alpaka::AccGpuCudaRt<TDim, Idx>> : public std::false_type {};
 #endif  // ALPAKA_ACC_GPU_CUDA_ENABLED

 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
   template <typename TDim>
   struct requires_single_thread_per_block<alpaka::AccGpuHipRt<TDim, Idx>> : public std::false_type {};
 #endif  // ALPAKA_ACC_GPU_HIP_ENABLED

 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
   template <typename TDim>
   struct requires_single_thread_per_block<alpaka::AccCpuThreads<TDim, Idx>> : public std::false_type {};
 #endif  // ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED

   // Whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   inline constexpr bool requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value;

   // Create an accelerator-dependent work division for 1-dimensional kernels
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
   inline WorkDiv<Dim1D> make_workdiv(Idx blocks, Idx elements) {
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim1D>(blocks, elements, Idx{1});
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
     }
   }

   // Create the accelerator-dependent workdiv for N-dimensional kernels
   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
   inline WorkDiv<alpaka::Dim<TAcc>> make_workdiv(const Vec<alpaka::Dim<TAcc>>& blocks,
                                                  const Vec<alpaka::Dim<TAcc>>& elements) {
     using Dim = alpaka::Dim<TAcc>;
     if constexpr (not requires_single_thread_per_block_v<TAcc>) {
       // On GPU backends, each thread is looking at a single element:
       //   - the number of threads per block is "elements";
       //   - the number of elements per thread is always 1.
       return WorkDiv<Dim>(blocks, elements, Vec<Dim>::ones());
     } else {
       // On CPU backends, run serially with a single thread per block:
       //   - the number of threads per block is always 1;
       //   - the number of elements per thread is "elements".
       return WorkDiv<Dim>(blocks, Vec<Dim>::ones(), elements);
     }
   }

   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
   class elements_with_stride {
   public:
     ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
           thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           extent_{stride_} {}

     ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc, Idx extent)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]},
           thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0u] * elements_},
           extent_{extent} {}

     class iterator {
       friend class elements_with_stride;

       ALPAKA_FN_ACC inline iterator(Idx elements, Idx stride, Idx extent, Idx first)
           : elements_{elements},
             stride_{stride},
             extent_{extent},
             first_{std::min(first, extent)},
             index_{first_},
             range_{std::min(first + elements, extent)} {}

     public:
       ALPAKA_FN_ACC inline Idx operator*() const { return index_; }

       // pre-increment the iterator
       ALPAKA_FN_ACC inline iterator& operator++() {
         if constexpr (requires_single_thread_per_block_v<TAcc>) {
           // increment the index along the elements processed by the current thread
           ++index_;
           if (index_ < range_)
             return *this;
         }

         // increment the thread index with the grid stride
         first_ += stride_;
         index_ = first_;
         range_ = std::min(first_ + elements_, extent_);
         if (index_ < extent_)
           return *this;

         // the iterator has reached or passed the end of the extent, clamp it to the extent
         first_ = extent_;
         index_ = extent_;
         range_ = extent_;
         return *this;
       }

       // post-increment the iterator
       ALPAKA_FN_ACC inline iterator operator++(int) {
         iterator old = *this;
         ++(*this);
         return old;
       }

       ALPAKA_FN_ACC inline bool operator==(iterator const& other) const {
         return (index_ == other.index_) and (first_ == other.first_);
       }

       ALPAKA_FN_ACC inline bool operator!=(iterator const& other) const { return not(*this == other); }

     private:
       // non-const to support iterator copy and assignment
       Idx elements_;
       Idx stride_;
       Idx extent_;
       // modified by the pre/post-increment operator
       Idx first_;
       Idx index_;
       Idx range_;
     };

     ALPAKA_FN_ACC inline iterator begin() const { return iterator(elements_, stride_, extent_, thread_); }

     ALPAKA_FN_ACC inline iterator end() const { return iterator(elements_, stride_, extent_, extent_); }

   private:
     const Idx elements_;
     const Idx thread_;
     const Idx stride_;
     const Idx extent_;
   };

   template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
   class elements_with_stride_nd {
   public:
     using Dim = alpaka::Dim<TAcc>;
     using Vec = alpaka::Vec<Dim, Idx>;

     ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
           thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           extent_{stride_} {}

     ALPAKA_FN_ACC inline elements_with_stride_nd(TAcc const& acc, Vec extent)
         : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
           thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
           extent_{extent} {}

     // tag used to construct an end iterator
     struct at_end_t {};

     class iterator {
       friend class elements_with_stride_nd;

     public:
       ALPAKA_FN_ACC inline Vec operator*() const { return index_; }

       // pre-increment the iterator
       ALPAKA_FN_ACC constexpr inline iterator operator++() {
         increment();
         return *this;
       }

       // post-increment the iterator
       ALPAKA_FN_ACC constexpr inline iterator operator++(int) {
         iterator old = *this;
         increment();
         return old;
       }

       ALPAKA_FN_ACC constexpr inline bool operator==(iterator const& other) const { return (index_ == other.index_); }

       ALPAKA_FN_ACC constexpr inline bool operator!=(iterator const& other) const { return not(*this == other); }

     private:
       // construct an iterator pointing to the first element to be processed by the current thread
       ALPAKA_FN_ACC inline iterator(elements_with_stride_nd const* loop, Vec first)
           : loop_{loop},
             first_{alpaka::elementwise_min(first, loop->extent_)},
             range_{alpaka::elementwise_min(first + loop->elements_, loop->extent_)},
             index_{first_} {}

       // construct an end iterator, pointing post the end of the extent
       ALPAKA_FN_ACC inline iterator(elements_with_stride_nd const* loop, at_end_t const&)
           : loop_{loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}

       template <size_t I>
       ALPAKA_FN_ACC inline constexpr bool nth_elements_loop() {
         bool overflow = false;
         ++index_[I];
         if (index_[I] >= range_[I]) {
           index_[I] = first_[I];
           overflow = true;
         }
         return overflow;
       }

       template <size_t N>
       ALPAKA_FN_ACC inline constexpr bool do_elements_loops() {
         if constexpr (N == 0) {
           // overflow
           return true;
         } else {
           if (not nth_elements_loop<N - 1>()) {
             return false;
           } else {
             return do_elements_loops<N - 1>();
           }
         }
       }

       template <size_t I>
       ALPAKA_FN_ACC inline constexpr bool nth_strided_loop() {
         bool overflow = false;
         first_[I] += loop_->stride_[I];
         if (first_[I] >= loop_->extent_[I]) {
           first_[I] = loop_->thread_[I];
           overflow = true;
         }
         index_[I] = first_[I];
         range_[I] = std::min(first_[I] + loop_->elements_[I], loop_->extent_[I]);
         return overflow;
       }

       template <size_t N>
       ALPAKA_FN_ACC inline constexpr bool do_strided_loops() {
         if constexpr (N == 0) {
           // overflow
           return true;
         } else {
           if (not nth_strided_loop<N - 1>()) {
             return false;
           } else {
             return do_strided_loops<N - 1>();
           }
         }
       }

       // increment the iterator
       ALPAKA_FN_ACC inline constexpr void increment() {
         if constexpr (requires_single_thread_per_block_v<TAcc>) {
           // linear N-dimensional loops over the elements associated to the thread;
           // do_elements_loops<>() returns true if any of those loops overflows
           if (not do_elements_loops<Dim::value>()) {
             // the elements loops did not overflow, return the next index
             return;
           }
         }

         // strided N-dimensional loop over the threads in the kernel launch grid;
         // do_strided_loops<>() returns true if any of those loops overflows
         if (not do_strided_loops<Dim::value>()) {
           // the strided loops did not overflow, return the next index
           return;
         }

         // the iterator has reached or passed the end of the extent, clamp it to the extent
         first_ = loop_->extent_;
         range_ = loop_->extent_;
         index_ = loop_->extent_;
       }

       // const pointer to the elements_with_stride_nd that the iterator refers to
       const elements_with_stride_nd* loop_;

       // modified by the pre/post-increment operator
       Vec first_;  // first element processed by this thread
       Vec range_;  // last element processed by this thread
       Vec index_;  // current element processed by this thread
     };

     ALPAKA_FN_ACC inline iterator begin() const {
       // check that all dimensions of the current thread index are within the extent
       if ((thread_ < extent_).all()) {
         // construct an iterator pointing to the first element to be processed by the current thread
         return iterator{this, thread_};
       } else {
         // construct an end iterator, pointing post the end of the extent
         return iterator{this, at_end_t{}};
       }
     }

     ALPAKA_FN_ACC inline iterator end() const {
       // construct an end iterator, pointing post the end of the extent
       return iterator{this, at_end_t{}};
     }

   private:
     const Vec elements_;
     const Vec thread_;
     const Vec stride_;
     const Vec extent_;
   };

 }  // namespace cms::alpakatools

 #endif  // HeterogeneousCore_AlpakaInterface_interface_workdivision_h
cms::alpakatools::elements_with_stride_nd::iterator::nth_strided_loop
ALPAKA_FN_ACC constexpr bool nth_strided_loop()
Definition: workdivision.h:247

cms::alpakatools::elements_with_stride_nd::elements_with_stride_nd
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc, Vec extent)
Definition: workdivision.h:177

cms::alpakatools::elements_with_stride_nd::iterator::operator==
ALPAKA_FN_ACC constexpr bool operator==(iterator const &other) const
Definition: workdivision.h:205

cms::alpakatools::elements_with_stride::iterator::index_
Idx index_
Definition: workdivision.h:150

cms::alpakatools::elements_with_stride_nd::iterator::increment
ALPAKA_FN_ACC constexpr void increment()
Definition: workdivision.h:274

heppy_loop.loop
loop
Definition: heppy_loop.py:28

cms::alpakatools::elements_with_stride::elements_with_stride
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc)
Definition: workdivision.h:81

cms::alpakatools::make_workdiv
WorkDiv< Dim1D > make_workdiv(Idx blocks, Idx elements)
Definition: workdivision.h:46

python.cmstools.all
def all(container)
workaround iterator generators for ROOT classes
Definition: cmstools.py:25

cms::alpakatools::divide_up_by
constexpr Idx divide_up_by(Idx value, Idx divisor)
Definition: workdivision.h:19

cms::alpakatools::elements_with_stride::elements_with_stride
ALPAKA_FN_ACC elements_with_stride(TAcc const &acc, Idx extent)
Definition: workdivision.h:87

cms::alpakatools::elements_with_stride::iterator::elements_
Idx elements_
Definition: workdivision.h:145

cms::alpakatools::elements_with_stride_nd::iterator
Definition: workdivision.h:186

alpaka_common::Idx
uint32_t Idx
Definition: config.h:13

cms::alpakatools::elements_with_stride_nd
Definition: workdivision.h:166

cms::alpakatools::elements_with_stride_nd::elements_
const Vec elements_
Definition: workdivision.h:323

cms::alpakatools::elements_with_stride_nd::iterator::loop_
const elements_with_stride_nd * loop_
Definition: workdivision.h:298

cms::alpakatools::elements_with_stride::begin
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:154

cms::alpakatools::elements_with_stride::iterator::stride_
Idx stride_
Definition: workdivision.h:146

cms::alpakatools::elements_with_stride::iterator
Definition: workdivision.h:93

alpaka_common
Definition: config.h:10

cms::alpakatools::elements_with_stride_nd::iterator::operator*
ALPAKA_FN_ACC Vec operator*() const
Definition: workdivision.h:190

cms::alpakatools
Definition: PortableCollection.h:44

cms::alpakatools::elements_with_stride::iterator::first_
Idx first_
Definition: workdivision.h:149

cms::alpakatools::elements_with_stride::iterator::extent_
Idx extent_
Definition: workdivision.h:147

cms::alpakatools::elements_with_stride_nd::elements_with_stride_nd
ALPAKA_FN_ACC elements_with_stride_nd(TAcc const &acc)
Definition: workdivision.h:171

cms::alpakatools::elements_with_stride_nd::at_end_t
Definition: workdivision.h:184

gather_cfg.blocks
blocks
Definition: gather_cfg.py:90

alpaka_common::WorkDiv
alpaka::WorkDivMembers< TDim, Idx > WorkDiv
Definition: config.h:30

cms::alpakatools::elements_with_stride_nd::iterator::iterator
ALPAKA_FN_ACC iterator(elements_with_stride_nd const *loop, at_end_t const &)
Definition: workdivision.h:218

cms::alpakatools::elements_with_stride_nd::iterator::nth_elements_loop
ALPAKA_FN_ACC constexpr bool nth_elements_loop()
Definition: workdivision.h:222

cms::alpakatools::elements_with_stride::stride_
const Idx stride_
Definition: workdivision.h:161

cms::alpakatools::elements_with_stride::iterator::operator*
ALPAKA_FN_ACC Idx operator*() const
Definition: workdivision.h:105

trackingPlots.other
other
Definition: trackingPlots.py:1467

cms::alpakatools::elements_with_stride::end
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:156

cms::alpakatools::round_up_by
constexpr Idx round_up_by(Idx value, Idx divisor)
Definition: workdivision.h:16

cms::alpakatools::elements_with_stride::iterator::iterator
ALPAKA_FN_ACC iterator(Idx elements, Idx stride, Idx extent, Idx first)
Definition: workdivision.h:96

SiStripPI::min
Definition: SiStripPayloadInspectorHelper.h:178

Exhume::I
const std::complex< double > I
Definition: I.h:8

alpaka
Definition: HostOnlyTask.h:9

cms::alpakatools::elements_with_stride::extent_
const Idx extent_
Definition: workdivision.h:162

value
Definition: value.py:1

cms::alpakatools::elements_with_stride_nd::end
ALPAKA_FN_ACC iterator end() const
Definition: workdivision.h:317

cms::alpakatools::elements_with_stride_nd::iterator::operator++
ALPAKA_FN_ACC constexpr iterator operator++()
Definition: workdivision.h:193

traits.h

cms::alpakatools::elements_with_stride_nd::Vec
alpaka::Vec< Dim, Idx > Vec
Definition: workdivision.h:169

cms::alpakatools::elements_with_stride::iterator::operator++
ALPAKA_FN_ACC iterator operator++(int)
Definition: workdivision.h:131

cms::alpakatools::elements_with_stride
Definition: workdivision.h:79

N
#define N
Definition: blowfish.cc:9

cms::alpakatools::requires_single_thread_per_block
Definition: workdivision.h:23

alpaka_common::Vec
alpaka::Vec< TDim, Idx > Vec
Definition: config.h:23

cms::alpakatools::elements_with_stride_nd::extent_
const Vec extent_
Definition: workdivision.h:326

cms::alpakatools::elements_with_stride_nd::iterator::do_elements_loops
ALPAKA_FN_ACC constexpr bool do_elements_loops()
Definition: workdivision.h:233

cms::alpakatools::elements_with_stride_nd::Dim
alpaka::Dim< TAcc > Dim
Definition: workdivision.h:168

cms::alpakatools::elements_with_stride_nd::iterator::range_
Vec range_
Definition: workdivision.h:302

dqmdumpme.first
first
Definition: dqmdumpme.py:55

cms::alpakatools::elements_with_stride::iterator::operator==
ALPAKA_FN_ACC bool operator==(iterator const &other) const
Definition: workdivision.h:137

cms::alpakatools::elements_with_stride_nd::begin
ALPAKA_FN_ACC iterator begin() const
Definition: workdivision.h:306

cms::alpakatools::elements_with_stride_nd::iterator::first_
Vec first_
Definition: workdivision.h:301

cms::alpakatools::elements_with_stride::iterator::range_
Idx range_
Definition: workdivision.h:151

cms::alpakatools::elements_with_stride_nd::stride_
const Vec stride_
Definition: workdivision.h:325

cms::alpakatools::elements_with_stride::thread_
const Idx thread_
Definition: workdivision.h:160

cms::alpakatools::elements_with_stride::iterator::operator++
ALPAKA_FN_ACC iterator & operator++()
Definition: workdivision.h:108

cms::alpakatools::elements_with_stride_nd::thread_
const Vec thread_
Definition: workdivision.h:324

cms::alpakatools::elements_with_stride_nd::iterator::operator++
ALPAKA_FN_ACC constexpr iterator operator++(int)
Definition: workdivision.h:199

config.h

gpuPixelDoublets::stride
auto stride
Definition: gpuPixelDoubletsAlgos.h:170

cms::alpakatools::requires_single_thread_per_block_v
constexpr bool requires_single_thread_per_block_v
Definition: workdivision.h:42

cms::alpakatools::elements_with_stride_nd::iterator::index_
Vec index_
Definition: workdivision.h:303

cms::alpakatools::elements_with_stride_nd::iterator::iterator
ALPAKA_FN_ACC iterator(elements_with_stride_nd const *loop, Vec first)
Definition: workdivision.h:211

bookConverter.elements
elements
Definition: bookConverter.py:147

cms::alpakatools::elements_with_stride_nd::iterator::do_strided_loops
ALPAKA_FN_ACC constexpr bool do_strided_loops()
Definition: workdivision.h:260

cms::alpakatools::elements_with_stride_nd::iterator::operator!=
ALPAKA_FN_ACC constexpr bool operator!=(iterator const &other) const
Definition: workdivision.h:207

cms::alpakatools::elements_with_stride::iterator::operator!=
ALPAKA_FN_ACC bool operator!=(iterator const &other) const
Definition: workdivision.h:141

cms::alpakatools::elements_with_stride::elements_
const Idx elements_
Definition: workdivision.h:159