gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp Source File#
gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp
Go to the documentation of this file.
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_DEVICE void load_int4_tile(WarpTile &dst, const WarpWindow &src)
Definition load_interleaved_pk_type.hpp:46
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool, TailNumber tail_number)
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:35
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:20
remove_cvref_t< typename Problem::CLayout > CLayout
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:33
remove_cvref_t< typename Problem::QuantGroupSize > QuantGroupSize
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:28
static constexpr bool PreshuffleB
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:97
static constexpr index_t KPerBlockBQ
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:72
remove_cvref_t< typename Problem::ComputeDataType > ComputeDataType
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:26
static constexpr index_t GetVectorSizeBQ()
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:77
static constexpr index_t NWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:135
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:41
static constexpr bool kPadN
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:118
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const BQDramBlockWindowTmp &bq_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:106
static constexpr bool kPadK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:119
static constexpr index_t MIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:137
WeightPreshufflePipelineAGmemBGmemCRegV2< Problem > Base
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:21
static constexpr index_t m_preload
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:148
static constexpr index_t NIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:138
static constexpr bool kPadM
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:117
remove_cvref_t< typename Problem::ALayout > ALayout
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:30
remove_cvref_t< typename Problem::ADataType > ADataType
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:22
static CK_TILE_HOST const std::string GetName()
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:83
static constexpr auto TailNum
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:98
static constexpr index_t KIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:139
remove_cvref_t< typename Problem::BQLayout > BQLayout
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:32
remove_cvref_t< typename Problem::BLayout > BLayout
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:31
remove_cvref_t< typename Problem::BDataType > BDataType
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:23
static constexpr index_t flatKPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:98
static constexpr index_t KIterPerQScale
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:81
static constexpr auto I0
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:124
remove_cvref_t< typename Problem::BQDataType > BQDataType
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:24
remove_cvref_t< typename Problem::CDataType > CDataType
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:25
static constexpr index_t kNPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:90
static constexpr index_t flatNPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:99
static constexpr index_t BlockSize
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:86
static constexpr index_t KPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:145
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const BQDramBlockWindowTmp &bq_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:455
static constexpr index_t kKPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:91
static constexpr index_t MPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:144
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:27
static constexpr auto config
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:38
static constexpr index_t kMPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:89
remove_cvref_t< decltype(PipelinePolicy::template GetBlockWeightPreshuffleBQuant< Problem >())> BlockWeightPreshuffle
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:35
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const BQDramBlockWindowTmp &bq_dram_block_window_tmp, index_t num_loop, TailNumber tail_number, void *p_smem_ping, void *p_smem_pong) const
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:476
static constexpr index_t QScalesPerBlockRow
Definition gemm_wp_bquant_pipeline_ag_bg_cr_v2.hpp:74
static constexpr auto I1
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:125
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:54
static constexpr index_t GetVectorSizeB()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:107
static constexpr index_t GetVectorSizeA()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:102
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:295
static constexpr index_t NWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:135
static constexpr bool kPadN
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:118
static constexpr index_t MWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:134
static constexpr bool kPadK
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:119
static constexpr index_t MIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:137
static constexpr index_t m_preload
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:148
static constexpr index_t NIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:138
static constexpr bool kPadM
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:117
static constexpr index_t KIterPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:139
static constexpr index_t flatKPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:98
static constexpr auto I0
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:124
static constexpr index_t kNPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:90
static constexpr index_t flatNPerWarp
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:99
static constexpr index_t BlockSize
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:86
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:490
static constexpr auto I2
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:126
static constexpr index_t KPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:145
static constexpr index_t kKPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:91
static constexpr index_t MPerBlockPerIter
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:144
static constexpr index_t kMPerBlock
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:89
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:436
static constexpr auto I1
Definition wp_pipeline_agmem_bgmem_creg_v2.hpp:125
Definition tile/core/utility/functional.hpp:43