ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type > Struct Template Reference

ThreadwiseTensorSliceTransfer_v2&lt; SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type &gt; Struct Template Reference#

Composable Kernel: ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type > Struct Template Reference
ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type > Struct Template Reference

Helper structure that facilitates transfer of source (grid) data to destination threads. More...

#include <threadwise_tensor_slice_transfer.hpp>

Public Types

using Index = MultiIndex<nDim>
using SrcCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))
using SrcCoordStep = decltype(make_tensor_coordinate_step(SrcDesc{}, Index{}))

Public Member Functions

__device__ constexpr ThreadwiseTensorSliceTransfer_v2 (const SrcDesc &src_desc, const Index &src_slice_origin_idx)
__device__ void SetSrcSliceOrigin (const SrcDesc &src_desc, const Index &src_slice_origin_idx)
template<typename SrcBuffer, typename DstBuffer, typename DstSliceOriginIdx>
__device__ void Run (const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &, const DstSliceOriginIdx &, DstBuffer &dst_buf)
__device__ void MoveSrcSliceWindow (const SrcDesc &src_desc, const Index &src_slice_origin_step_idx)
template<typename SrcMoveSliceWindowStepHack>
__device__ void MoveSrcSliceWindow (const SrcDesc &src_desc, const Index &src_slice_origin_step_idx, const SrcMoveSliceWindowStepHack &src_move_slice_window_step_hack)

Static Public Member Functions

static __device__ constexpr auto GetSrcCoordinateResetStep ()

Static Public Attributes

static constexpr index_t nDim = SliceLengths::Size()
static constexpr index_t PackedSize

Detailed Description

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
struct ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >

Helper structure that facilitates transfer of source (grid) data to destination threads.

The following assumptions are made:

  • For Source (Grid) Data:
    1. The source tensor descriptor SrcDesc is not known at compile-time.
    2. The source buffer is a dynamic buffer.
    3. The source slice origin index src_slice_origin_idx is not known at compile-time.
  • For Destination (Thread) Data:
    1. The destination tensor descriptor DstDesc is known at compile-time.
    2. The destination buffer dst_buf is a static buffer.
    3. The destination slice origin index dst_slice_origin_idx is known at compile-time.
Template Parameters
SrcDataThe data type of the source tensor.
DstDataThe data type of the destination tensor.
SrcDescThe descriptor type of the source tensor.
DstDescThe descriptor type of the destination tensor.
SliceLengthsThe lengths of the slice to be transferred.
DimAccessOrderThe order of dimension access for the space-filling curve.
SrcVectorDimThe dimension along which vectorized access is performed in the source tensor.
SrcScalarPerVectorThe number of scalar elements per vector in the source tensor.
SrcScalarStrideInVectorNot used.
SrcResetCoordinateAfterRuncontrols whether source coordinate is restored after each Run or rolled back one step in MoveSrcSliceWindow
InvalidElementAsNaNWhether to fill invalid elements with NaN (only applicable for floating-point types).

Member Typedef Documentation

◆ Index

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
using ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::Index = MultiIndex<nDim>

◆ SrcCoord

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
using ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::SrcCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))

◆ SrcCoordStep

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
using ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::SrcCoordStep = decltype(make_tensor_coordinate_step(SrcDesc{}, Index{}))

Constructor & Destructor Documentation

◆ ThreadwiseTensorSliceTransfer_v2()

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
__device__ constexpr ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::ThreadwiseTensorSliceTransfer_v2 ( const SrcDesc & src_desc,
const Index & src_slice_origin_idx )
inlineconstexpr

Member Function Documentation

◆ GetSrcCoordinateResetStep()

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::GetSrcCoordinateResetStep ( )
inlinestaticconstexpr

◆ MoveSrcSliceWindow() [1/2]

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::MoveSrcSliceWindow ( const SrcDesc & src_desc,
const Index & src_slice_origin_step_idx )
inline

◆ MoveSrcSliceWindow() [2/2]

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
template<typename SrcMoveSliceWindowStepHack>
__device__ void ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::MoveSrcSliceWindow ( const SrcDesc & src_desc,
const Index & src_slice_origin_step_idx,
const SrcMoveSliceWindowStepHack & src_move_slice_window_step_hack )
inline

◆ Run()

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
template<typename SrcBuffer, typename DstBuffer, typename DstSliceOriginIdx>
__device__ void ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::Run ( const SrcDesc & src_desc,
const SrcBuffer & src_buf,
const DstDesc & ,
const DstSliceOriginIdx & ,
DstBuffer & dst_buf )
inline

◆ SetSrcSliceOrigin()

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
__device__ void ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::SetSrcSliceOrigin ( const SrcDesc & src_desc,
const Index & src_slice_origin_idx )
inline

Member Data Documentation

◆ nDim

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::nDim = SliceLengths::Size()
staticconstexpr

◆ PackedSize

template<typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SliceLengths, typename DimAccessOrder, index_t SrcVectorDim, index_t SrcScalarPerVector, index_t SrcScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool InvalidElementAsNaN = false, typename enable_if< DstDesc::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::ThreadwiseTensorSliceTransfer_v2< SrcData, DstData, SrcDesc, DstDesc, SliceLengths, DimAccessOrder, SrcVectorDim, SrcScalarPerVector, SrcScalarStrideInVector, SrcResetCoordinateAfterRun, InvalidElementAsNaN, type >::PackedSize
staticconstexpr
Initial value:
= []() {
return 2;
else
return 1;
}()
constexpr bool is_same_v
Definition type.hpp:283
Definition data_type.hpp:187

The documentation for this struct was generated from the following file: