#include <amd_xdlops.hpp>
|
| template<class FloatC> |
| static __device__ void | Run (const f8x32_t ®_a, const int32_t &scale_a, const f8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf8x32_t ®_a, const int32_t &scale_a, const bf8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f8x32_t ®_a, const int32_t &scale_a, const bf8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf8x32_t ®_a, const int32_t &scale_a, const f8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f6x32_t ®_a, const int32_t scale_a, const f6x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f6x16x2_t ®_a, const int32_t scale_a, const f6x16x2_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf6x32_t ®_a, const int32_t scale_a, const bf6x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf6x16x2_t ®_a, const int32_t scale_a, const bf6x16x2_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f4x32_t ®_a, const int32_t scale_a, const f4x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
◆ Run() [1/9]
◆ Run() [2/9]
◆ Run() [3/9]
◆ Run() [4/9]
◆ Run() [5/9]
◆ Run() [6/9]
◆ Run() [7/9]
◆ Run() [8/9]
◆ Run() [9/9]
The documentation for this struct was generated from the following file: