#include <moe_sorting_kernel.hpp>
|
| template<typename data_t, int wave_size> |
| __device__ void | wave_cumsum (data_t &thread_data) const |
| CK_TILE_DEVICE index_t | calc_index (index_t total_col, index_t row, index_t col) const |
| CK_TILE_DEVICE void | moe_buf_set_zero_kernel (uint8x16_t *buf, long_index_t buf_bytes) const |
| CK_TILE_DEVICE void | moe_buf_set_zero_kernel_2d (void *buf, index_t row, index_t col, index_t elem_bytes) const |
| CK_TILE_DEVICE void | moe_align_block_size_kernel (const IndexType *__restrict__ topk_id, const WeightType *__restrict__ weights, index_t *p_sorted_token_ids, WeightType *p_sorted_weights, index_t *p_sorted_expert_ids, index_t *p_total_tokens_post_pad, const index_t num_experts, const index_t tokens_per_thread, const index_t numel, const mdiv unit_size_mdiv, const mdiv topk_mdiv, void *smem) const |
| CK_TILE_DEVICE void | moe_align_block_size_kernel_ex (const IndexType *__restrict__ topk_id, const WeightType *__restrict__ weights, const IndexType *__restrict__ local_expert_mask, index_t *p_sorted_token_ids, WeightType *p_sorted_weights, index_t *p_sorted_expert_ids, index_t *p_total_tokens_post_pad, const index_t num_experts, const index_t tokens, const mdiv unit_size_mdiv, const mdiv topk_mdiv, const mdiv expert_mdiv, const index_t smem_rows, void *smem) const |
| CK_TILE_DEVICE void | operator() (Kargs kargs) const |
◆ Hargs
template<typename Problem_>
◆ IndexType
template<typename Problem_>
◆ MoeSortingKargs
template<typename Problem_>
◆ Problem
template<typename Problem_>
◆ WeightType
template<typename Problem_>
◆ BlockSize()
template<typename Problem_>
◆ calc_index()
template<typename Problem_>
◆ get_num_cu()
template<typename Problem_>
◆ GetSmemSize()
template<typename Problem_>
◆ GridSize()
template<typename Problem_>
◆ MakeKargs()
template<typename Problem_>
◆ moe_align_block_size_kernel()
template<typename Problem_>
| CK_TILE_DEVICE void ck_tile::MoeSortingKernel< Problem_ >::moe_align_block_size_kernel |
( |
const IndexType *__restrict__ | topk_id, |
|
|
const WeightType *__restrict__ | weights, |
|
|
index_t * | p_sorted_token_ids, |
|
|
WeightType * | p_sorted_weights, |
|
|
index_t * | p_sorted_expert_ids, |
|
|
index_t * | p_total_tokens_post_pad, |
|
|
const index_t | num_experts, |
|
|
const index_t | tokens_per_thread, |
|
|
const index_t | numel, |
|
|
const mdiv | unit_size_mdiv, |
|
|
const mdiv | topk_mdiv, |
|
|
void * | smem ) const |
|
inline |
◆ moe_align_block_size_kernel_ex()
template<typename Problem_>
| CK_TILE_DEVICE void ck_tile::MoeSortingKernel< Problem_ >::moe_align_block_size_kernel_ex |
( |
const IndexType *__restrict__ | topk_id, |
|
|
const WeightType *__restrict__ | weights, |
|
|
const IndexType *__restrict__ | local_expert_mask, |
|
|
index_t * | p_sorted_token_ids, |
|
|
WeightType * | p_sorted_weights, |
|
|
index_t * | p_sorted_expert_ids, |
|
|
index_t * | p_total_tokens_post_pad, |
|
|
const index_t | num_experts, |
|
|
const index_t | tokens, |
|
|
const mdiv | unit_size_mdiv, |
|
|
const mdiv | topk_mdiv, |
|
|
const mdiv | expert_mdiv, |
|
|
const index_t | smem_rows, |
|
|
void * | smem ) const |
|
inline |
◆ moe_buf_set_zero_kernel()
template<typename Problem_>
◆ moe_buf_set_zero_kernel_2d()
template<typename Problem_>
◆ operator()()
template<typename Problem_>
◆ wave_cumsum()
template<typename Problem_>
template<typename data_t, int wave_size>
◆ wave_reduce()
template<typename Problem_>
template<typename T, typename F,
index_t wave_size_ = get_warp_size()>
◆ kBlockSize
template<typename Problem_>
◆ OCCUPANCY
template<typename Problem_>
The documentation for this struct was generated from the following file: