DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq > Struct Template Reference

DeviceElementwiseImpl&lt; InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq > Struct Template Reference
ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq > Struct Template Reference

#include <device_elementwise_dynamic_vector_dims_impl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >:
ck::tensor_operation::device::DeviceElementwise< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim > ck::tensor_operation::device::DeviceElementwise< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, UnaryOperation, Scale, NumDim > ck::tensor_operation::device::BaseOperator ck::tensor_operation::device::BaseOperator ck::tensor_operation::device::BaseOperator ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using InDataTypePointerTuple = decltype(GenerateInDataTypePointerTuple())
using OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())
using InGridDescTuple = decltype(GenerateInOutGridDescTuple<NumInput>())
using OutGridDescTuple = decltype(GenerateInOutGridDescTuple<NumOutput>())
using Block2TileMap = BlockToCTileMap_M00_N0_M01Adapt<M0PerBlock, M1PerBlock>
using GridwiseElementwiseOp
using GridwiseElementwiseOpSameInOutVectorDim
using InDataTypePointerTuple = decltype(GenerateInDataTypePointerTuple())
using OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())
using InGrid1dDescTuple = decltype(GenerateInOutGrid1dDescTuple(Number<NumInput>{}))
using OutGrid1dDescTuple = decltype(GenerateInOutGrid1dDescTuple(Number<NumOutput>{}))
using GridwiseElementwise

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, NumDim > lengths, const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray, const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray, const std::array< const void *, NumInput > in_dev_buffers, const std::array< void *, NumOutput > out_dev_buffers, ElementwiseOperation elementwise_op) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, NumDim > lengths, const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray, const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray, const std::array< const void *, NumInput > in_dev_buffers, const std::array< void *, NumOutput > out_dev_buffers, ElementwiseOperation elementwise_op, UnaryOperation unary_op, Scale scale_op) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static auto GenerateInDataTypePointerTuple ()
static auto GenerateOutDataTypePointerTuple ()
static index_t GetLowestStrideDim (const std::array< index_t, NumDim > &strides)
template<typename InOutDescriptor>
static auto PadInputOutputDescriptor (const InOutDescriptor &desc)
static auto GenerateBatchDimsLenghtsTuple (const std::array< index_t, NumDim > &lengths, const index_t M0_dim, const index_t M1_dim)
static auto MakeDescriptor (const std::array< index_t, NumDim > &lengths, const std::array< index_t, NumDim > &in_strides, const std::array< index_t, NumDim > &out_strides, const std::array< index_t, NumDim > &desc_strides)
template<index_t NumTensors>
static auto GenerateInOutGridDescTuple ()
static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const std::array< index_t, NumDim > lengths, const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray, const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray, const std::array< const void *, NumInput > in_dev_buffers, const std::array< void *, NumOutput > out_dev_buffers, ElementwiseOperation elementwise_op)
static auto MakeInvoker ()
static auto GenerateInDataTypePointerTuple ()
static auto GenerateOutDataTypePointerTuple ()
template<typename Desc_M>
static auto PadDescriptor_M_1d (Desc_M desc_m, index_t gridSize, index_t blockSize)
static auto MakeDescriptor_M (const std::array< index_t, NumDim > &lengths, const std::array< index_t, NumDim > &stride, index_t gridSize, index_t blockSize)
template<index_t TupleSize>
static auto GenerateInOutGrid1dDescTuple (Number< TupleSize >)
static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const std::array< index_t, NumDim > lengths, const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray, const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray, const std::array< const void *, NumInput > in_dev_buffers, const std::array< void *, NumOutput > out_dev_buffers, ElementwiseOperation elementwise_op, UnaryOperation unary_op, Scale scale_op)
static auto MakeInvoker ()

Static Public Attributes

static constexpr int NumInput = InDataTypeTuple::Size()
static constexpr int NumOutput = OutDataTypeTuple::Size()
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
Static Public Attributes inherited from ck::tensor_operation::device::DeviceElementwise< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim >
static constexpr int NumInput = InDataTypeTuple::Size()
static constexpr int NumOutput = OutDataTypeTuple::Size()
Static Public Attributes inherited from ck::tensor_operation::device::DeviceElementwise< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, UnaryOperation, Scale, NumDim >
static constexpr int NumInput
static constexpr int NumOutput

Detailed Description

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
struct ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >
Note
This structure is deprecated (left for backwards compatibility). Please use DeviceElementwiseImpl from device_elementwise_dynamic_vector_dims_impl.hpp.

Member Typedef Documentation

◆ Block2TileMap

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::Block2TileMap = BlockToCTileMap_M00_N0_M01Adapt<M0PerBlock, M1PerBlock>

◆ GridwiseElementwise

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GridwiseElementwise
Initial value:
ElementwiseOperation,
UnaryOperation,
Scale,
MPerThread,
InScalarPerVectorSeq,
OutScalarPerVectorSeq>
Definition gridwise_elementwise_1d_scale.hpp:49
decltype(GenerateInOutGrid1dDescTuple(Number< NumInput >{})) InGrid1dDescTuple
Definition device_elementwise_scale_impl.hpp:133
decltype(GenerateInDataTypePointerTuple()) InDataTypePointerTuple
Definition device_elementwise_dynamic_vector_dims_impl.hpp:70
decltype(GenerateInOutGrid1dDescTuple(Number< NumOutput >{})) OutGrid1dDescTuple
Definition device_elementwise_scale_impl.hpp:134
decltype(GenerateOutDataTypePointerTuple()) OutDataTypePointerTuple
Definition device_elementwise_dynamic_vector_dims_impl.hpp:71

◆ GridwiseElementwiseOp

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GridwiseElementwiseOp
Initial value:
ElementwiseOperation,
BlockSize,
M0PerBlock,
M1PerBlock,
M0PerThread,
M1PerThread,
ThreadClusterArrangeOrder,
InScalarPerVectorSeq,
OutScalarPerVectorSeq,
I1,
I0>
decltype(GenerateInOutGridDescTuple< NumOutput >()) OutGridDescTuple
Definition device_elementwise_dynamic_vector_dims_impl.hpp:175
static constexpr auto I1
Definition device_elementwise_dynamic_vector_dims_impl.hpp:42
static constexpr auto I0
Definition device_elementwise_dynamic_vector_dims_impl.hpp:41
BlockToCTileMap_M00_N0_M01Adapt< M0PerBlock, M1PerBlock > Block2TileMap
Definition device_elementwise_dynamic_vector_dims_impl.hpp:177
GridwiseElementwise_1D< InGrid1dDescTuple, OutGrid1dDescTuple, InDataTypePointerTuple, OutDataTypePointerTuple, ElementwiseOperation, UnaryOperation, Scale, MPerThread, InScalarPerVectorSeq, OutScalarPerVectorSeq > GridwiseElementwise
Definition device_elementwise_scale_impl.hpp:136
decltype(GenerateInOutGridDescTuple< NumInput >()) InGridDescTuple
Definition device_elementwise_dynamic_vector_dims_impl.hpp:174

◆ GridwiseElementwiseOpSameInOutVectorDim

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GridwiseElementwiseOpSameInOutVectorDim
Initial value:
ElementwiseOperation,
BlockSize,
M0PerBlock,
M1PerBlock,
M0PerThread,
M1PerThread,
ThreadClusterArrangeOrder,
InScalarPerVectorSeq,
OutScalarPerVectorSeq,
I1,
I1>

◆ InDataTypePointerTuple [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::InDataTypePointerTuple = decltype(GenerateInDataTypePointerTuple())

◆ InDataTypePointerTuple [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::InDataTypePointerTuple = decltype(GenerateInDataTypePointerTuple())

◆ InGrid1dDescTuple

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::InGrid1dDescTuple = decltype(GenerateInOutGrid1dDescTuple(Number<NumInput>{}))

◆ InGridDescTuple

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::InGridDescTuple = decltype(GenerateInOutGridDescTuple<NumInput>())

◆ OutDataTypePointerTuple [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())

◆ OutDataTypePointerTuple [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())

◆ OutGrid1dDescTuple

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::OutGrid1dDescTuple = decltype(GenerateInOutGrid1dDescTuple(Number<NumOutput>{}))

◆ OutGridDescTuple

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
using ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::OutGridDescTuple = decltype(GenerateInOutGridDescTuple<NumOutput>())

Member Function Documentation

◆ GenerateBatchDimsLenghtsTuple()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateBatchDimsLenghtsTuple ( const std::array< index_t, NumDim > & lengths,
const index_t M0_dim,
const index_t M1_dim )
inlinestatic

◆ GenerateInDataTypePointerTuple() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateInDataTypePointerTuple ( )
inlinestatic

◆ GenerateInDataTypePointerTuple() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateInDataTypePointerTuple ( )
inlinestatic

◆ GenerateInOutGrid1dDescTuple()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
template<index_t TupleSize>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateInOutGrid1dDescTuple ( Number< TupleSize > )
inlinestatic

◆ GenerateInOutGridDescTuple()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
template<index_t NumTensors>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateInOutGridDescTuple ( )
inlinestatic

◆ GenerateOutDataTypePointerTuple() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateOutDataTypePointerTuple ( )
inlinestatic

◆ GenerateOutDataTypePointerTuple() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GenerateOutDataTypePointerTuple ( )
inlinestatic

◆ GetLowestStrideDim()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
index_t ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GetLowestStrideDim ( const std::array< index_t, NumDim > & strides)
inlinestatic

◆ GetTypeString() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::string ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GetTypeString ( ) const
inlineoverridevirtual

◆ GetTypeString() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::string ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/4]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
bool ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [2/4]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
bool ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [3/4]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
bool ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ IsSupportedArgument() [4/4]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
bool ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgument() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeArgument ( const std::array< index_t, NumDim > lengths,
const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray,
const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray,
const std::array< const void *, NumInput > in_dev_buffers,
const std::array< void *, NumOutput > out_dev_buffers,
ElementwiseOperation elementwise_op )
inlinestatic

◆ MakeArgument() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeArgument ( const std::array< index_t, NumDim > lengths,
const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray,
const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray,
const std::array< const void *, NumInput > in_dev_buffers,
const std::array< void *, NumOutput > out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op )
inlinestatic

◆ MakeArgumentPointer() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeArgumentPointer ( const std::array< index_t, NumDim > lengths,
const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray,
const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray,
const std::array< const void *, NumInput > in_dev_buffers,
const std::array< void *, NumOutput > out_dev_buffers,
ElementwiseOperation elementwise_op )
inlineoverridevirtual

◆ MakeArgumentPointer() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeArgumentPointer ( const std::array< index_t, NumDim > lengths,
const std::array< std::array< index_t, NumDim >, NumInput > inStridesArray,
const std::array< std::array< index_t, NumDim >, NumOutput > outStridesArray,
const std::array< const void *, NumInput > in_dev_buffers,
const std::array< void *, NumOutput > out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op )
inlineoverridevirtual

◆ MakeDescriptor()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeDescriptor ( const std::array< index_t, NumDim > & lengths,
const std::array< index_t, NumDim > & in_strides,
const std::array< index_t, NumDim > & out_strides,
const std::array< index_t, NumDim > & desc_strides )
inlinestatic

◆ MakeDescriptor_M()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeDescriptor_M ( const std::array< index_t, NumDim > & lengths,
const std::array< index_t, NumDim > & stride,
index_t gridSize,
index_t blockSize )
inlinestatic

◆ MakeInvoker() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeInvoker ( )
inlinestatic

◆ MakeInvoker() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer() [1/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeInvokerPointer() [2/2]

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ PadDescriptor_M_1d()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
template<typename Desc_M>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::PadDescriptor_M_1d ( Desc_M desc_m,
index_t gridSize,
index_t blockSize )
inlinestatic

◆ PadInputOutputDescriptor()

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
template<typename InOutDescriptor>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::PadInputOutputDescriptor ( const InOutDescriptor & desc)
inlinestatic

Member Data Documentation

◆ I0

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
auto ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::I1 = Number<1>{}
staticconstexpr

◆ NumInput

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
constexpr int ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::NumInput = InDataTypeTuple::Size()
staticconstexpr

◆ NumOutput

template<typename InDataTypeTuple, typename OutDataTypeTuple, typename ElementwiseOperation, index_t NumDim, index_t BlockSize, index_t M0PerBlock, index_t M1PerBlock, index_t M0PerThread, index_t M1PerThread, typename ThreadClusterArrangeOrder, typename InScalarPerVectorSeq, typename OutScalarPerVectorSeq>
constexpr int ck::tensor_operation::device::DeviceElementwiseImpl< InDataTypeTuple, OutDataTypeTuple, ElementwiseOperation, NumDim, BlockSize, M0PerBlock, M1PerBlock, M0PerThread, M1PerThread, ThreadClusterArrangeOrder, InScalarPerVectorSeq, OutScalarPerVectorSeq >::NumOutput = OutDataTypeTuple::Size()
staticconstexpr

The documentation for this struct was generated from the following files: