DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

DeviceNormalizationBwdGammaBetaImpl&lt; DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference
ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

#include <device_normalization_bwd_gamma_beta_impl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >:
ck::tensor_operation::device::DeviceNormalizationBwdGammaBeta< DYDataType, XDataType, MeanInvStdDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1))
using GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1}))
using GridwiseNormalizationBwdGammaBeta

Public Member Functions

template<index_t SrcVectorDim, index_t SrcVectorSize>
bool IsSrcVectorDimSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides)
template<index_t DstVectorSize>
bool IsDstVectorSizeValid (const std::vector< index_t > &lengths, const std::vector< index_t > &strides)
bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::vector< index_t > inLengths, const std::vector< index_t > dyStrides, const std::vector< index_t > xStrides, const std::vector< index_t > meanStrides, const std::vector< index_t > invStdStrides, const std::vector< index_t > outLengths, const std::vector< index_t > dgammaStrides, const std::vector< index_t > dbetaStrides, const std::vector< index_t > reduceDims, const void *p_dy, const void *p_x, const void *p_mean, const void *p_invStd, void *p_dgamma, void *p_dbeta) override
virtual std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static auto MakeSrc2dDescriptor (const std::vector< index_t > &inLengths, const std::vector< index_t > &inStrides, int numBlockTileIteration)
static auto MakeDst1dDescriptor (const std::vector< index_t > &outLengths, const std::vector< index_t > &outStrides)

Static Public Attributes

static constexpr index_t DYSrcVectorDim = IsDYFastestDimReduced ? 1 : 0
static constexpr index_t XSrcVectorDim = IsXFastestDimReduced ? 1 : 0
static constexpr index_t MeanInvStdSrcVectorDim = IsMeanInvStdFastestDimReduced ? 1 : 0
static constexpr index_t NumInvariantDim = Rank - NumReduceDim
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
static constexpr bool reduceAllDim = (NumInvariantDim == 0)

Member Typedef Documentation

◆ GridDesc_M

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M = decltype(MakeDst1dDescriptor({1}, {1}))

◆ GridDesc_M_K

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1))

◆ GridwiseNormalizationBwdGammaBeta

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GridwiseNormalizationBwdGammaBeta
Initial value:
XDataType,
MeanInvStdDataType,
ComputeDataType,
DGammaDataType,
DBetaDataType,
BlockSize,
MThreadClusterSize,
KThreadClusterSize,
MThreadSliceSize,
KThreadSliceSize,
DYSrcVectorSize,
XSrcVectorSize,
MeanInvStdSrcVectorSize,
DGammaDstVectorSize,
DBetaDstVectorSize>
Definition gridwise_normalization_bwd_gamma_beta.hpp:37
decltype(MakeSrc2dDescriptor({1}, {1}, 1)) GridDesc_M_K
Definition device_normalization_bwd_gamma_beta_impl.hpp:194
decltype(MakeDst1dDescriptor({1}, {1})) GridDesc_M
Definition device_normalization_bwd_gamma_beta_impl.hpp:195
static constexpr index_t MeanInvStdSrcVectorDim
Definition device_normalization_bwd_gamma_beta_impl.hpp:92
static constexpr index_t DYSrcVectorDim
Definition device_normalization_bwd_gamma_beta_impl.hpp:90
static constexpr index_t XSrcVectorDim
Definition device_normalization_bwd_gamma_beta_impl.hpp:91

Member Function Documentation

◆ GetTypeString()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
std::string ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsDstVectorSizeValid()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t DstVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsDstVectorSizeValid ( const std::vector< index_t > & lengths,
const std::vector< index_t > & strides )
inline

◆ IsSrcVectorDimSizeValid()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
template<index_t SrcVectorDim, index_t SrcVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsSrcVectorDimSizeValid ( const std::vector< index_t > & lengths,
const std::vector< index_t > & strides )
inline

◆ IsSupportedArgument()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgumentPointer()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeArgumentPointer ( const std::vector< index_t > inLengths,
const std::vector< index_t > dyStrides,
const std::vector< index_t > xStrides,
const std::vector< index_t > meanStrides,
const std::vector< index_t > invStdStrides,
const std::vector< index_t > outLengths,
const std::vector< index_t > dgammaStrides,
const std::vector< index_t > dbetaStrides,
const std::vector< index_t > reduceDims,
const void * p_dy,
const void * p_x,
const void * p_mean,
const void * p_invStd,
void * p_dgamma,
void * p_dbeta )
inlineoverridevirtual

◆ MakeDst1dDescriptor()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeDst1dDescriptor ( const std::vector< index_t > & outLengths,
const std::vector< index_t > & outStrides )
inlinestatic

◆ MakeInvokerPointer()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
virtual std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeSrc2dDescriptor()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MakeSrc2dDescriptor ( const std::vector< index_t > & inLengths,
const std::vector< index_t > & inStrides,
int numBlockTileIteration )
inlinestatic

Member Data Documentation

◆ DYSrcVectorDim

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::DYSrcVectorDim = IsDYFastestDimReduced ? 1 : 0
staticconstexpr

◆ K_BlockTileSize

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ MeanInvStdSrcVectorDim

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MeanInvStdSrcVectorDim = IsMeanInvStdFastestDimReduced ? 1 : 0
staticconstexpr

◆ NumInvariantDim

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::NumInvariantDim = Rank - NumReduceDim
staticconstexpr

◆ reduceAllDim

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
bool ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::reduceAllDim = (NumInvariantDim == 0)
staticconstexpr

◆ XSrcVectorDim

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, bool IsDYFastestDimReduced, index_t DYSrcVectorSize, bool IsXFastestDimReduced, index_t XSrcVectorSize, bool IsMeanInvStdFastestDimReduced, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::tensor_operation::device::DeviceNormalizationBwdGammaBetaImpl< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, IsDYFastestDimReduced, DYSrcVectorSize, IsXFastestDimReduced, XSrcVectorSize, IsMeanInvStdFastestDimReduced, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::XSrcVectorDim = IsXFastestDimReduced ? 1 : 0
staticconstexpr

The documentation for this struct was generated from the following file: