device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp File Reference#
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp File Reference
#include <iostream>#include <memory>#include <sstream>#include "device.hpp"#include "device_conv_fwd.hpp"#include "common_header.hpp"#include "ck/utility/env.hpp"#include "tensor_layout.hpp"#include "convolution_forward_specialization.hpp"#include "tensor_descriptor.hpp"#include "tensor_descriptor_helper.hpp"#include "transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp"#include "gridwise_gemm_xdlops_v2r3.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
| namespace | ck::tensor_operation |
| namespace | ck::tensor_operation::device |
Functions | |
| template<typename GridwiseGemm, typename FloatAB, typename FloatC, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, typename Block2CTileMap, bool HasMainKBlockLoop> | |
| __global__ void | ck::tensor_operation::device::kernel_gemm_xdlops_v2r3_for_conv3d (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const index_t num_batches, const index_t a_batch_stride, const index_t b_batch_stride, const index_t c_batch_stride, const AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1, const CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2 c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const Block2CTileMap block_2_ctile_map) |