gemm_tile_partitioner.hpp Source File#
gemm_tile_partitioner.hpp
Go to the documentation of this file.
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition tile/core/arch/amd_buffer_addressing.hpp:35
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_HOST_DEVICE constexpr auto lcm(X x, Y y)
Definition tile/core/numeric/math.hpp:314
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
STL namespace.
static constexpr index_t MPerBlock
Definition gemm_tile_partitioner.hpp:232
static CK_TILE_HOST_DEVICE auto GridSize(index_t M, index_t N) noexcept(noexcept(MPerBlock !=0 &&NPerBlock !=0)) -> index_t
Calculates GEMM kernel grid size.
Definition gemm_tile_partitioner.hpp:250
static constexpr index_t KPerBlock
Definition gemm_tile_partitioner.hpp:234
static CK_TILE_HOST_DEVICE auto GetLoopNum(index_t K) noexcept -> index_t
Calculate number of loop iterations over GEMM's K dimension.
Definition gemm_tile_partitioner.hpp:263
CK_TILE_HOST_DEVICE GemmSpatiallyLocalTilePartitioner() noexcept=delete
remove_cvref_t< BlockGemmShapeType > BlockGemmShape
Definition gemm_tile_partitioner.hpp:230
static constexpr index_t NPerBlock
Definition gemm_tile_partitioner.hpp:233
CK_TILE_DEVICE auto GetOutputTileIndex(index_t block_1d_id) noexcept -> const tuple< index_t, index_t >
Calculate workgroup 1D index mapping into 2D output C-tile space.
Definition gemm_tile_partitioner.hpp:275
CK_TILE_HOST_DEVICE GemmTile1DPartitioner() noexcept=delete
static CK_TILE_HOST_DEVICE auto GetLoopNum(index_t K) noexcept -> index_t
Calculate number of loop iterations over GEMM's K dimension.
Definition gemm_tile_partitioner.hpp:130
static CK_TILE_DEVICE auto GetOutputTileIndex(index_t blockIdx) noexcept -> const tuple< index_t, index_t >
Calculate workgroup 1D index mapping into 2D output C-tile space.
Definition gemm_tile_partitioner.hpp:142
remove_cvref_t< BlockGemmShape_ > BlockGemmShape
Definition gemm_tile_partitioner.hpp:90
static constexpr index_t MPerBlock
Definition gemm_tile_partitioner.hpp:92
static constexpr index_t NPerBlock
Definition gemm_tile_partitioner.hpp:93
static CK_TILE_HOST_DEVICE auto GridSize(index_t M, index_t N) noexcept(noexcept(MPerBlock !=0 &&NPerBlock !=0)) -> index_t
Calculates GEMM kernel grid size.
Definition gemm_tile_partitioner.hpp:117
static constexpr index_t KPerBlock
Definition gemm_tile_partitioner.hpp:94
static CK_TILE_DEVICE auto GetOutputTileIndex(index_t blockIdx, index_t blockIdy) noexcept -> const tuple< index_t, index_t >
The function returns 2D output tile space.
Definition gemm_tile_partitioner.hpp:74
static CK_TILE_HOST auto GridSize(index_t M, index_t N) noexcept(noexcept(MPerBlock !=0 &&NPerBlock !=0)) -> dim3
Calculates GEMM kernel grid size.
Definition gemm_tile_partitioner.hpp:41
remove_cvref_t< BlockGemmShapeType > BlockGemmShape
Definition gemm_tile_partitioner.hpp:23
static CK_TILE_HOST_DEVICE auto GetLoopNum(index_t K) noexcept -> index_t
Calculate number of loop iterations over GEMM's K dimension.
Definition gemm_tile_partitioner.hpp:54
static constexpr index_t NPerBlock
Definition gemm_tile_partitioner.hpp:26
static constexpr index_t KPerBlock
Definition gemm_tile_partitioner.hpp:27
static constexpr index_t MPerBlock
Definition gemm_tile_partitioner.hpp:25
CK_TILE_HOST_DEVICE GemmTile2DPartitioner() noexcept=delete
GemmTile1DPartitioner::GetOutputTileIndex's std::false specialization, checking expression validity i...
Definition gemm_tile_partitioner.hpp:161
Struct used to calculate offseted tile indexes.
Definition gemm_tile_partitioner.hpp:184
static CK_TILE_DEVICE auto GetOffsetedTileIndex(index_t block_start, index_t M, index_t N) noexcept -> const tuple< index_t, index_t >
The function subtracts the block's start (offset) from 1D raw-indexes.
Definition gemm_tile_partitioner.hpp:192
static CK_TILE_DEVICE auto GetOffsetedTileIndex(index_t block_start, index_t M, index_t N, index_t block_idx) noexcept -> const tuple< index_t, index_t >
The function subtracts the block's start (offset) from a given block index.
Definition gemm_tile_partitioner.hpp:208
CK_TILE_HOST_DEVICE uint32_t GetTileIntersections(uint32_t tiles_, const mdiv &equiv_tiles_) const noexcept
Get location of intersection of tiles for reduction.
Definition gemm_tile_partitioner.hpp:705
CK_TILE_HOST_DEVICE uint32_t GetNumTileK() const noexcept
Definition gemm_tile_partitioner.hpp:797
uint32_t k_iters_per_big_block
Definition gemm_tile_partitioner.hpp:803
CK_TILE_HOST_DEVICE uint32_t GetSkTotalIters() const noexcept
Get total number of iterations for sk tiles.
Definition gemm_tile_partitioner.hpp:628
CK_TILE_HOST_DEVICE StreamKTilePartitioner() noexcept=delete
static constexpr uint32_t MPerBlock
Definition gemm_tile_partitioner.hpp:391
CK_TILE_HOST_DEVICE uint32_t GetNumTileM() const noexcept
Definition gemm_tile_partitioner.hpp:795
CK_TILE_DEVICE uint32_t GetAccBufferOffsetFromBlock(uint32_t block_idx_) const noexcept
Calculate offset based on block_idx index for big/little streamk blocks.
Definition gemm_tile_partitioner.hpp:773
CK_TILE_DEVICE void GetTileIdxWithOffset(uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const noexcept
Get index of tile during a specified iteration.
Definition gemm_tile_partitioner.hpp:670
uint32_t sk_num_blocks
Definition gemm_tile_partitioner.hpp:799
mdiv equiv_tiles_little
Definition gemm_tile_partitioner.hpp:807
CK_TILE_DEVICE uint32_t GetAccBufferOffsetFromTile(uint32_t tile_idx_) const noexcept
Calculate offset based on tile index for big/little tiles.
Definition gemm_tile_partitioner.hpp:745
CK_TILE_HOST_DEVICE uint32_t GetNumTileN() const noexcept
Definition gemm_tile_partitioner.hpp:796
CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSize(uint32_t acc_element_bytes) const noexcept
Calculates the total buffer space needed for accumulation and the semaphore.
Definition gemm_tile_partitioner.hpp:697
static constexpr uint32_t NPerBlock
Definition gemm_tile_partitioner.hpp:392
static constexpr uint32_t KPerBlock
Definition gemm_tile_partitioner.hpp:393
CK_TILE_HOST_DEVICE uint32_t GetTilesCoverSkBlock(uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const noexcept
Calculate the number of tiles needed for the number of sk blocks.
Definition gemm_tile_partitioner.hpp:718
static CK_TILE_HOST_DEVICE auto GetLoopNum(uint32_t K) noexcept -> uint32_t
Calculate number of loop iterations over K dimension for given work unit.
Definition gemm_tile_partitioner.hpp:563
mdiv equiv_tiles_big
Definition gemm_tile_partitioner.hpp:806
CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSizeForSemaphore() const noexcept
Calculates the buffer space needed for the semaphore.
Definition gemm_tile_partitioner.hpp:689
CK_TILE_HOST auto GridSize() const noexcept -> dim3
Calculate optimal grid size for Stream-K.
Definition gemm_tile_partitioner.hpp:550
CK_TILE_HOST_DEVICE uint32_t GetSkTiles() const noexcept
Get total number of sk tiles.
Definition gemm_tile_partitioner.hpp:638
CK_TILE_DEVICE auto GetOutputTileIndex(uint32_t tile_idx) const noexcept -> tuple< uint32_t, uint32_t >
Get output tile index for standard 2D mapping (compatibility).
Definition gemm_tile_partitioner.hpp:572
uint32_t sk_num_big_blocks
Definition gemm_tile_partitioner.hpp:800
uint32_t dp_start_block_idx
Definition gemm_tile_partitioner.hpp:801
CK_TILE_DEVICE void GetBlockItr(uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const noexcept
Get work range for a given block ID.
Definition gemm_tile_partitioner.hpp:603
mdiv k_iters_per_tile
Definition gemm_tile_partitioner.hpp:805
CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSizeForAcc(uint32_t acc_element_bytes) const noexcept
Calculates the buffer space needed for accumulation.
Definition gemm_tile_partitioner.hpp:678
CK_TILE_HOST_DEVICE uint32_t GetTotalAccBuffers() const noexcept
Calculate the amount of total accumulation buffers required for stream-k.
Definition gemm_tile_partitioner.hpp:728
BlockGemmShapeType BlockGemmShape
Definition gemm_tile_partitioner.hpp:389
CK_TILE_DEVICE uint32_t GetCurrentIterLength(uint32_t iter_start, uint32_t iter_end) const noexcept
Get length of loop iterations for stream-k loop.
Definition gemm_tile_partitioner.hpp:648
CK_TILE_DEVICE uint32_t GetTileIdx(uint32_t iter) const noexcept
Get index of tile during a specified iteration.
Definition gemm_tile_partitioner.hpp:661
uint32_t reduction_start_block_idx
Definition gemm_tile_partitioner.hpp:802
Definition magic_div.hpp:228
Definition magic_div.hpp:186
Definition tile/core/container/tuple.hpp:192