25#include "hip_fp16_math_fwd.h"
26#include "amd_hip_vector_types.h"
31#if !defined(__HIPCC_RTC__)
36#if !__HIP_DEVICE_COMPILE__
44#if _LIBCPP_VERSION && __HIP__
47struct __numeric_type<_Float16>
49 static _Float16 __test(_Float16);
51 typedef _Float16 type;
52 static const bool value =
true;
57#pragma push_macro("__DEVICE__")
58#pragma push_macro("__RETURN_TYPE")
60#define __DEVICE__ static __device__
61#define __RETURN_TYPE bool
63#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
66uint64_t __make_mantissa_base8(
const char* tagp)
72 if (tmp >=
'0' && tmp <=
'7') r = (r * 8u) + tmp -
'0';
83uint64_t __make_mantissa_base10(
const char* tagp)
89 if (tmp >=
'0' && tmp <=
'9') r = (r * 10u) + tmp -
'0';
100uint64_t __make_mantissa_base16(
const char* tagp)
106 if (tmp >=
'0' && tmp <=
'9') r = (r * 16u) + tmp -
'0';
107 else if (tmp >=
'a' && tmp <=
'f') r = (r * 16u) + tmp -
'a' + 10;
108 else if (tmp >=
'A' && tmp <=
'F') r = (r * 16u) + tmp -
'A' + 10;
119uint64_t __make_mantissa(
const char* tagp)
121 if (!tagp)
return 0u;
126 if (*tagp ==
'x' || *tagp ==
'X')
return __make_mantissa_base16(tagp);
127 else return __make_mantissa_base8(tagp);
130 return __make_mantissa_base10(tagp);
135#if __HIP_CLANG_ONLY__
138int amd_mixed_dot(
short2 a,
short2 b,
int c,
bool saturate) {
139 return __ockl_sdot2(a.data, b.data, c, saturate);
144 return __ockl_udot2(a.data, b.data, c, saturate);
148int amd_mixed_dot(
char4 a,
char4 b,
int c,
bool saturate) {
149 return __ockl_sdot4(a.data, b.data, c, saturate);
153uint amd_mixed_dot(
uchar4 a,
uchar4 b, uint c,
bool saturate) {
154 return __ockl_udot4(a.data, b.data, c, saturate);
158int amd_mixed_dot(
int a,
int b,
int c,
bool saturate) {
159 return __ockl_sdot8(a, b, c, saturate);
163uint amd_mixed_dot(uint a, uint b, uint c,
bool saturate) {
164 return __ockl_udot8(a, b, c, saturate);
168#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
172float abs(
float x) {
return __ocml_fabs_f32(x); }
175float acosf(
float x) {
return __ocml_acos_f32(x); }
178float acoshf(
float x) {
return __ocml_acosh_f32(x); }
181float asinf(
float x) {
return __ocml_asin_f32(x); }
184float asinhf(
float x) {
return __ocml_asinh_f32(x); }
187float atan2f(
float x,
float y) {
return __ocml_atan2_f32(x, y); }
190float atanf(
float x) {
return __ocml_atan_f32(x); }
193float atanhf(
float x) {
return __ocml_atanh_f32(x); }
196float cbrtf(
float x) {
return __ocml_cbrt_f32(x); }
199float ceilf(
float x) {
return __ocml_ceil_f32(x); }
202float copysignf(
float x,
float y) {
return __ocml_copysign_f32(x, y); }
205float cosf(
float x) {
return __ocml_cos_f32(x); }
208float coshf(
float x) {
return __ocml_cosh_f32(x); }
211float cospif(
float x) {
return __ocml_cospi_f32(x); }
214float cyl_bessel_i0f(
float x) {
return __ocml_i0_f32(x); }
217float cyl_bessel_i1f(
float x) {
return __ocml_i1_f32(x); }
220float erfcf(
float x) {
return __ocml_erfc_f32(x); }
223float erfcinvf(
float x) {
return __ocml_erfcinv_f32(x); }
226float erfcxf(
float x) {
return __ocml_erfcx_f32(x); }
229float erff(
float x) {
return __ocml_erf_f32(x); }
232float erfinvf(
float x) {
return __ocml_erfinv_f32(x); }
235float exp10f(
float x) {
return __ocml_exp10_f32(x); }
238float exp2f(
float x) {
return __ocml_exp2_f32(x); }
241float expf(
float x) {
return __ocml_exp_f32(x); }
244float expm1f(
float x) {
return __ocml_expm1_f32(x); }
247float fabsf(
float x) {
return __ocml_fabs_f32(x); }
250float fdimf(
float x,
float y) {
return __ocml_fdim_f32(x, y); }
253float fdividef(
float x,
float y) {
return x / y; }
256float floorf(
float x) {
return __ocml_floor_f32(x); }
259float fmaf(
float x,
float y,
float z) {
return __ocml_fma_f32(x, y, z); }
262float fmaxf(
float x,
float y) {
return __ocml_fmax_f32(x, y); }
265float fminf(
float x,
float y) {
return __ocml_fmin_f32(x, y); }
268float fmodf(
float x,
float y) {
return __ocml_fmod_f32(x, y); }
271float frexpf(
float x,
int* nptr)
275 __ocml_frexp_f32(x, (__attribute__((address_space(5)))
int*) &tmp);
282float hypotf(
float x,
float y) {
return __ocml_hypot_f32(x, y); }
285int ilogbf(
float x) {
return __ocml_ilogb_f32(x); }
288__RETURN_TYPE isfinite(
float x) {
return __ocml_isfinite_f32(x); }
291__RETURN_TYPE isinf(
float x) {
return __ocml_isinf_f32(x); }
294__RETURN_TYPE isnan(
float x) {
return __ocml_isnan_f32(x); }
297float j0f(
float x) {
return __ocml_j0_f32(x); }
300float j1f(
float x) {
return __ocml_j1_f32(x); }
303float jnf(
int n,
float x)
307 if (n == 0)
return j0f(x);
308 if (n == 1)
return j1f(x);
312 for (
int i = 1; i < n; ++i) {
313 float x2 = (2 * i) / x * x1 - x0;
322float ldexpf(
float x,
int e) {
return __ocml_ldexp_f32(x, e); }
325float lgammaf(
float x) {
return __ocml_lgamma_f32(x); }
328long long int llrintf(
float x) {
return __ocml_rint_f32(x); }
331long long int llroundf(
float x) {
return __ocml_round_f32(x); }
334float log10f(
float x) {
return __ocml_log10_f32(x); }
337float log1pf(
float x) {
return __ocml_log1p_f32(x); }
340float log2f(
float x) {
return __ocml_log2_f32(x); }
343float logbf(
float x) {
return __ocml_logb_f32(x); }
346float logf(
float x) {
return __ocml_log_f32(x); }
349long int lrintf(
float x) {
return __ocml_rint_f32(x); }
352long int lroundf(
float x) {
return __ocml_round_f32(x); }
355float modff(
float x,
float* iptr)
359 __ocml_modf_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
366float nanf(
const char* tagp)
371 uint32_t mantissa : 22;
373 uint32_t exponent : 8;
377 static_assert(
sizeof(float) ==
sizeof(ieee_float),
"");
381 tmp.bits.exponent = ~0u;
383 tmp.bits.mantissa = __make_mantissa(tagp);
389float nearbyintf(
float x) {
return __ocml_nearbyint_f32(x); }
392float nextafterf(
float x,
float y) {
return __ocml_nextafter_f32(x, y); }
395float norm3df(
float x,
float y,
float z) {
return __ocml_len3_f32(x, y, z); }
398float norm4df(
float x,
float y,
float z,
float w)
400 return __ocml_len4_f32(x, y, z, w);
404float normcdff(
float x) {
return __ocml_ncdf_f32(x); }
407float normcdfinvf(
float x) {
return __ocml_ncdfinv_f32(x); }
410float normf(
int dim,
const float* a)
413 while (dim--) { r += a[0] * a[0]; ++a; }
415 return __ocml_sqrt_f32(r);
419float powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
422float powif(
float base,
int iexp) {
return __ocml_pown_f32(base, iexp); }
425float rcbrtf(
float x) {
return __ocml_rcbrt_f32(x); }
428float remainderf(
float x,
float y) {
return __ocml_remainder_f32(x, y); }
431float remquof(
float x,
float y,
int* quo)
435 __ocml_remquo_f32(x, y, (__attribute__((address_space(5)))
int*) &tmp);
442float rhypotf(
float x,
float y) {
return __ocml_rhypot_f32(x, y); }
445float rintf(
float x) {
return __ocml_rint_f32(x); }
448float rnorm3df(
float x,
float y,
float z)
450 return __ocml_rlen3_f32(x, y, z);
455float rnorm4df(
float x,
float y,
float z,
float w)
457 return __ocml_rlen4_f32(x, y, z, w);
461float rnormf(
int dim,
const float* a)
464 while (dim--) { r += a[0] * a[0]; ++a; }
466 return __ocml_rsqrt_f32(r);
470float roundf(
float x) {
return __ocml_round_f32(x); }
473float rsqrtf(
float x) {
return __ocml_rsqrt_f32(x); }
476float scalblnf(
float x,
long int n)
478 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
482float scalbnf(
float x,
int n) {
return __ocml_scalbn_f32(x, n); }
485__RETURN_TYPE signbit(
float x) {
return __ocml_signbit_f32(x); }
488void sincosf(
float x,
float* sptr,
float* cptr)
493 __ocml_sincos_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
498void sincospif(
float x,
float* sptr,
float* cptr)
503 __ocml_sincospi_f32(x, (__attribute__((address_space(5)))
float*) &tmp);
508float sinf(
float x) {
return __ocml_sin_f32(x); }
511float sinhf(
float x) {
return __ocml_sinh_f32(x); }
514float sinpif(
float x) {
return __ocml_sinpi_f32(x); }
517float sqrtf(
float x) {
return __ocml_sqrt_f32(x); }
520float tanf(
float x) {
return __ocml_tan_f32(x); }
523float tanhf(
float x) {
return __ocml_tanh_f32(x); }
526float tgammaf(
float x) {
return __ocml_tgamma_f32(x); }
529float truncf(
float x) {
return __ocml_trunc_f32(x); }
532float y0f(
float x) {
return __ocml_y0_f32(x); }
535float y1f(
float x) {
return __ocml_y1_f32(x); }
538float ynf(
int n,
float x)
543 if (n == 0)
return y0f(x);
544 if (n == 1)
return y1f(x);
548 for (
int i = 1; i < n; ++i) {
549 float x2 = (2 * i) / x * x1 - x0;
560float __cosf(
float x) {
return __ocml_native_cos_f32(x); }
563float __exp10f(
float x) {
return __ocml_native_exp10_f32(x); }
566float __expf(
float x) {
return __ocml_native_exp_f32(x); }
567#if defined OCML_BASIC_ROUNDED_OPERATIONS
570float __fadd_rd(
float x,
float y) {
return __ocml_add_rtn_f32(x, y); }
574float __fadd_rn(
float x,
float y) {
return x + y; }
575#if defined OCML_BASIC_ROUNDED_OPERATIONS
578float __fadd_ru(
float x,
float y) {
return __ocml_add_rtp_f32(x, y); }
581float __fadd_rz(
float x,
float y) {
return __ocml_add_rtz_f32(x, y); }
584float __fdiv_rd(
float x,
float y) {
return __ocml_div_rtn_f32(x, y); }
588float __fdiv_rn(
float x,
float y) {
return x / y; }
589#if defined OCML_BASIC_ROUNDED_OPERATIONS
592float __fdiv_ru(
float x,
float y) {
return __ocml_div_rtp_f32(x, y); }
595float __fdiv_rz(
float x,
float y) {
return __ocml_div_rtz_f32(x, y); }
599float __fdividef(
float x,
float y) {
return x / y; }
600#if defined OCML_BASIC_ROUNDED_OPERATIONS
603float __fmaf_rd(
float x,
float y,
float z)
605 return __ocml_fma_rtn_f32(x, y, z);
610float __fmaf_rn(
float x,
float y,
float z)
612 return __ocml_fma_f32(x, y, z);
614#if defined OCML_BASIC_ROUNDED_OPERATIONS
617float __fmaf_ru(
float x,
float y,
float z)
619 return __ocml_fma_rtp_f32(x, y, z);
623float __fmaf_rz(
float x,
float y,
float z)
625 return __ocml_fma_rtz_f32(x, y, z);
629float __fmul_rd(
float x,
float y) {
return __ocml_mul_rtn_f32(x, y); }
633float __fmul_rn(
float x,
float y) {
return x * y; }
634#if defined OCML_BASIC_ROUNDED_OPERATIONS
637float __fmul_ru(
float x,
float y) {
return __ocml_mul_rtp_f32(x, y); }
640float __fmul_rz(
float x,
float y) {
return __ocml_mul_rtz_f32(x, y); }
643float __frcp_rd(
float x) {
return __builtin_amdgcn_rcpf(x); }
647float __frcp_rn(
float x) {
return __builtin_amdgcn_rcpf(x); }
648#if defined OCML_BASIC_ROUNDED_OPERATIONS
651float __frcp_ru(
float x) {
return __builtin_amdgcn_rcpf(x); }
654float __frcp_rz(
float x) {
return __builtin_amdgcn_rcpf(x); }
658float __frsqrt_rn(
float x) {
return __builtin_amdgcn_rsqf(x); }
659#if defined OCML_BASIC_ROUNDED_OPERATIONS
662float __fsqrt_rd(
float x) {
return __ocml_sqrt_rtn_f32(x); }
666float __fsqrt_rn(
float x) {
return __ocml_native_sqrt_f32(x); }
667#if defined OCML_BASIC_ROUNDED_OPERATIONS
670float __fsqrt_ru(
float x) {
return __ocml_sqrt_rtp_f32(x); }
673float __fsqrt_rz(
float x) {
return __ocml_sqrt_rtz_f32(x); }
676float __fsub_rd(
float x,
float y) {
return __ocml_sub_rtn_f32(x, y); }
680float __fsub_rn(
float x,
float y) {
return x - y; }
681#if defined OCML_BASIC_ROUNDED_OPERATIONS
684float __fsub_ru(
float x,
float y) {
return __ocml_sub_rtp_f32(x, y); }
687float __fsub_rz(
float x,
float y) {
return __ocml_sub_rtz_f32(x, y); }
691float __log10f(
float x) {
return __ocml_native_log10_f32(x); }
694float __log2f(
float x) {
return __ocml_native_log2_f32(x); }
697float __logf(
float x) {
return __ocml_native_log_f32(x); }
700float __powf(
float x,
float y) {
return __ocml_pow_f32(x, y); }
703float __saturatef(
float x) {
return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
706void __sincosf(
float x,
float* sptr,
float* cptr)
708 *sptr = __ocml_native_sin_f32(x);
709 *cptr = __ocml_native_cos_f32(x);
713float __sinf(
float x) {
return __ocml_native_sin_f32(x); }
716float __tanf(
float x) {
return __ocml_tan_f32(x); }
723double abs(
double x) {
return __ocml_fabs_f64(x); }
726double acos(
double x) {
return __ocml_acos_f64(x); }
729double acosh(
double x) {
return __ocml_acosh_f64(x); }
732double asin(
double x) {
return __ocml_asin_f64(x); }
735double asinh(
double x) {
return __ocml_asinh_f64(x); }
738double atan(
double x) {
return __ocml_atan_f64(x); }
741double atan2(
double x,
double y) {
return __ocml_atan2_f64(x, y); }
744double atanh(
double x) {
return __ocml_atanh_f64(x); }
747double cbrt(
double x) {
return __ocml_cbrt_f64(x); }
750double ceil(
double x) {
return __ocml_ceil_f64(x); }
753double copysign(
double x,
double y) {
return __ocml_copysign_f64(x, y); }
756double cos(
double x) {
return __ocml_cos_f64(x); }
759double cosh(
double x) {
return __ocml_cosh_f64(x); }
762double cospi(
double x) {
return __ocml_cospi_f64(x); }
765double cyl_bessel_i0(
double x) {
return __ocml_i0_f64(x); }
768double cyl_bessel_i1(
double x) {
return __ocml_i1_f64(x); }
771double erf(
double x) {
return __ocml_erf_f64(x); }
774double erfc(
double x) {
return __ocml_erfc_f64(x); }
777double erfcinv(
double x) {
return __ocml_erfcinv_f64(x); }
780double erfcx(
double x) {
return __ocml_erfcx_f64(x); }
783double erfinv(
double x) {
return __ocml_erfinv_f64(x); }
786double exp(
double x) {
return __ocml_exp_f64(x); }
789double exp10(
double x) {
return __ocml_exp10_f64(x); }
792double exp2(
double x) {
return __ocml_exp2_f64(x); }
795double expm1(
double x) {
return __ocml_expm1_f64(x); }
798double fabs(
double x) {
return __ocml_fabs_f64(x); }
801double fdim(
double x,
double y) {
return __ocml_fdim_f64(x, y); }
804double floor(
double x) {
return __ocml_floor_f64(x); }
807double fma(
double x,
double y,
double z) {
return __ocml_fma_f64(x, y, z); }
810double fmax(
double x,
double y) {
return __ocml_fmax_f64(x, y); }
813double fmin(
double x,
double y) {
return __ocml_fmin_f64(x, y); }
816double fmod(
double x,
double y) {
return __ocml_fmod_f64(x, y); }
819double frexp(
double x,
int* nptr)
823 __ocml_frexp_f64(x, (__attribute__((address_space(5)))
int*) &tmp);
830double hypot(
double x,
double y) {
return __ocml_hypot_f64(x, y); }
833int ilogb(
double x) {
return __ocml_ilogb_f64(x); }
836__RETURN_TYPE isfinite(
double x) {
return __ocml_isfinite_f64(x); }
839__RETURN_TYPE isinf(
double x) {
return __ocml_isinf_f64(x); }
842__RETURN_TYPE isnan(
double x) {
return __ocml_isnan_f64(x); }
845double j0(
double x) {
return __ocml_j0_f64(x); }
848double j1(
double x) {
return __ocml_j1_f64(x); }
851double jn(
int n,
double x)
856 if (n == 0)
return j0f(x);
857 if (n == 1)
return j1f(x);
861 for (
int i = 1; i < n; ++i) {
862 double x2 = (2 * i) / x * x1 - x0;
871double ldexp(
double x,
int e) {
return __ocml_ldexp_f64(x, e); }
874double lgamma(
double x) {
return __ocml_lgamma_f64(x); }
877long long int llrint(
double x) {
return __ocml_rint_f64(x); }
880long long int llround(
double x) {
return __ocml_round_f64(x); }
883double log(
double x) {
return __ocml_log_f64(x); }
886double log10(
double x) {
return __ocml_log10_f64(x); }
889double log1p(
double x) {
return __ocml_log1p_f64(x); }
892double log2(
double x) {
return __ocml_log2_f64(x); }
895double logb(
double x) {
return __ocml_logb_f64(x); }
898long int lrint(
double x) {
return __ocml_rint_f64(x); }
901long int lround(
double x) {
return __ocml_round_f64(x); }
904double modf(
double x,
double* iptr)
908 __ocml_modf_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
915double nan(
const char* tagp)
921 uint64_t mantissa : 51;
923 uint32_t exponent : 11;
926 static_assert(
sizeof(double) ==
sizeof(ieee_double),
"");
930 tmp.bits.exponent = ~0u;
932 tmp.bits.mantissa = __make_mantissa(tagp);
936 static_assert(
sizeof(uint64_t)==
sizeof(
double));
937 uint64_t val = __make_mantissa(tagp);
939 return *
reinterpret_cast<double*
>(&val);
944double nearbyint(
double x) {
return __ocml_nearbyint_f64(x); }
947double nextafter(
double x,
double y) {
return __ocml_nextafter_f64(x, y); }
950double norm(
int dim,
const double* a)
953 while (dim--) { r += a[0] * a[0]; ++a; }
955 return __ocml_sqrt_f64(r);
959double norm3d(
double x,
double y,
double z)
961 return __ocml_len3_f64(x, y, z);
965double norm4d(
double x,
double y,
double z,
double w)
967 return __ocml_len4_f64(x, y, z, w);
971double normcdf(
double x) {
return __ocml_ncdf_f64(x); }
974double normcdfinv(
double x) {
return __ocml_ncdfinv_f64(x); }
977double pow(
double x,
double y) {
return __ocml_pow_f64(x, y); }
980double powi(
double base,
int iexp) {
return __ocml_pown_f64(base, iexp); }
983double rcbrt(
double x) {
return __ocml_rcbrt_f64(x); }
986double remainder(
double x,
double y) {
return __ocml_remainder_f64(x, y); }
989double remquo(
double x,
double y,
int* quo)
993 __ocml_remquo_f64(x, y, (__attribute__((address_space(5)))
int*) &tmp);
1000double rhypot(
double x,
double y) {
return __ocml_rhypot_f64(x, y); }
1003double rint(
double x) {
return __ocml_rint_f64(x); }
1006double rnorm(
int dim,
const double* a)
1009 while (dim--) { r += a[0] * a[0]; ++a; }
1011 return __ocml_rsqrt_f64(r);
1015double rnorm3d(
double x,
double y,
double z)
1017 return __ocml_rlen3_f64(x, y, z);
1021double rnorm4d(
double x,
double y,
double z,
double w)
1023 return __ocml_rlen4_f64(x, y, z, w);
1027double round(
double x) {
return __ocml_round_f64(x); }
1030double rsqrt(
double x) {
return __ocml_rsqrt_f64(x); }
1033double scalbln(
double x,
long int n)
1035 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1039double scalbn(
double x,
int n) {
return __ocml_scalbn_f64(x, n); }
1042__RETURN_TYPE signbit(
double x) {
return __ocml_signbit_f64(x); }
1045double sin(
double x) {
return __ocml_sin_f64(x); }
1048void sincos(
double x,
double* sptr,
double* cptr)
1052 __ocml_sincos_f64(x, (__attribute__((address_space(5)))
double*) &tmp);
1057void sincospi(
double x,
double* sptr,
double* cptr)
1060 *sptr = __ocml_sincospi_f64(
1061 x, (__attribute__((address_space(5)))
double*) &tmp);
1066double sinh(
double x) {
return __ocml_sinh_f64(x); }
1069double sinpi(
double x) {
return __ocml_sinpi_f64(x); }
1072double sqrt(
double x) {
return __ocml_sqrt_f64(x); }
1075double tan(
double x) {
return __ocml_tan_f64(x); }
1078double tanh(
double x) {
return __ocml_tanh_f64(x); }
1081double tgamma(
double x) {
return __ocml_tgamma_f64(x); }
1084double trunc(
double x) {
return __ocml_trunc_f64(x); }
1087double y0(
double x) {
return __ocml_y0_f64(x); }
1090double y1(
double x) {
return __ocml_y1_f64(x); }
1093double yn(
int n,
double x)
1098 if (n == 0)
return j0f(x);
1099 if (n == 1)
return j1f(x);
1103 for (
int i = 1; i < n; ++i) {
1104 double x2 = (2 * i) / x * x1 - x0;
1113#if defined OCML_BASIC_ROUNDED_OPERATIONS
1116double __dadd_rd(
double x,
double y) {
return __ocml_add_rtn_f64(x, y); }
1120double __dadd_rn(
double x,
double y) {
return x + y; }
1121#if defined OCML_BASIC_ROUNDED_OPERATIONS
1124double __dadd_ru(
double x,
double y) {
return __ocml_add_rtp_f64(x, y); }
1127double __dadd_rz(
double x,
double y) {
return __ocml_add_rtz_f64(x, y); }
1130double __ddiv_rd(
double x,
double y) {
return __ocml_div_rtn_f64(x, y); }
1134double __ddiv_rn(
double x,
double y) {
return x / y; }
1135#if defined OCML_BASIC_ROUNDED_OPERATIONS
1138double __ddiv_ru(
double x,
double y) {
return __ocml_div_rtp_f64(x, y); }
1141double __ddiv_rz(
double x,
double y) {
return __ocml_div_rtz_f64(x, y); }
1144double __dmul_rd(
double x,
double y) {
return __ocml_mul_rtn_f64(x, y); }
1148double __dmul_rn(
double x,
double y) {
return x * y; }
1149#if defined OCML_BASIC_ROUNDED_OPERATIONS
1152double __dmul_ru(
double x,
double y) {
return __ocml_mul_rtp_f64(x, y); }
1155double __dmul_rz(
double x,
double y) {
return __ocml_mul_rtz_f64(x, y); }
1158double __drcp_rd(
double x) {
return __builtin_amdgcn_rcp(x); }
1162double __drcp_rn(
double x) {
return __builtin_amdgcn_rcp(x); }
1163#if defined OCML_BASIC_ROUNDED_OPERATIONS
1166double __drcp_ru(
double x) {
return __builtin_amdgcn_rcp(x); }
1169double __drcp_rz(
double x) {
return __builtin_amdgcn_rcp(x); }
1172double __dsqrt_rd(
double x) {
return __ocml_sqrt_rtn_f64(x); }
1176double __dsqrt_rn(
double x) {
return __ocml_sqrt_f64(x); }
1177#if defined OCML_BASIC_ROUNDED_OPERATIONS
1180double __dsqrt_ru(
double x) {
return __ocml_sqrt_rtp_f64(x); }
1183double __dsqrt_rz(
double x) {
return __ocml_sqrt_rtz_f64(x); }
1186double __dsub_rd(
double x,
double y) {
return __ocml_sub_rtn_f64(x, y); }
1190double __dsub_rn(
double x,
double y) {
return x - y; }
1191#if defined OCML_BASIC_ROUNDED_OPERATIONS
1194double __dsub_ru(
double x,
double y) {
return __ocml_sub_rtp_f64(x, y); }
1197double __dsub_rz(
double x,
double y) {
return __ocml_sub_rtz_f64(x, y); }
1200double __fma_rd(
double x,
double y,
double z)
1202 return __ocml_fma_rtn_f64(x, y, z);
1207double __fma_rn(
double x,
double y,
double z)
1209 return __ocml_fma_f64(x, y, z);
1211#if defined OCML_BASIC_ROUNDED_OPERATIONS
1214double __fma_ru(
double x,
double y,
double z)
1216 return __ocml_fma_rtp_f64(x, y, z);
1220double __fma_rz(
double x,
double y,
double z)
1222 return __ocml_fma_rtz_f64(x, y, z);
1233 int sgn = x >> (
sizeof(int) * CHAR_BIT - 1);
1234 return (x ^ sgn) - sgn;
1240 long sgn = x >> (
sizeof(long) * CHAR_BIT - 1);
1241 return (x ^ sgn) - sgn;
1245long long llabs(
long long x)
1247 long long sgn = x >> (
sizeof(
long long) * CHAR_BIT - 1);
1248 return (x ^ sgn) - sgn;
1251#if defined(__cplusplus)
1254 long abs(
long x) {
return labs(x); }
1257 long long abs(
long long x) {
return llabs(x); }
1262inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1263 return __ocml_fma_f16(x, y, z);
1267inline float fma(
float x,
float y,
float z) {
1268 return fmaf(x, y, z);
1271#pragma push_macro("__DEF_FLOAT_FUN")
1272#pragma push_macro("__DEF_FLOAT_FUN2")
1273#pragma push_macro("__DEF_FLOAT_FUN2I")
1274#pragma push_macro("__HIP_OVERLOAD")
1275#pragma push_macro("__HIP_OVERLOAD2")
1278template<
bool __B,
class __T =
void>
1288#define __HIP_OVERLOAD1(__retty, __fn) \
1289 template <typename __T> \
1291 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1294 return ::__fn((double)__x); \
1300#define __HIP_OVERLOAD2(__retty, __fn) \
1301 template <typename __T1, typename __T2> \
1302 __DEVICE__ typename __hip_enable_if< \
1303 std::numeric_limits<__T1>::is_specialized && \
1304 std::numeric_limits<__T2>::is_specialized, \
1306 __fn(__T1 __x, __T2 __y) { \
1307 return __fn((double)__x, (double)__y); \
1311#define __DEF_FUN1(retty, func) \
1314float func(float x) \
1316 return func##f(x); \
1318__HIP_OVERLOAD1(retty, func)
1321#define __DEF_FUNI(retty, func) \
1324retty func(float x) \
1326 return func##f(x); \
1328__HIP_OVERLOAD1(retty, func)
1331#define __DEF_FUN2(retty, func) \
1334float func(float x, float y) \
1336 return func##f(x, y); \
1338__HIP_OVERLOAD2(retty, func)
1340__DEF_FUN1(
double, acos)
1341__DEF_FUN1(
double, acosh)
1342__DEF_FUN1(
double, asin)
1343__DEF_FUN1(
double, asinh)
1344__DEF_FUN1(
double, atan)
1345__DEF_FUN2(
double, atan2);
1346__DEF_FUN1(
double, atanh)
1347__DEF_FUN1(
double, cbrt)
1348__DEF_FUN1(
double, ceil)
1349__DEF_FUN2(
double, copysign);
1350__DEF_FUN1(
double, cos)
1351__DEF_FUN1(
double, cosh)
1352__DEF_FUN1(
double, erf)
1353__DEF_FUN1(
double, erfc)
1354__DEF_FUN1(
double, exp)
1355__DEF_FUN1(
double, exp2)
1356__DEF_FUN1(
double, expm1)
1357__DEF_FUN1(
double, fabs)
1358__DEF_FUN2(
double, fdim);
1359__DEF_FUN1(
double, floor)
1360__DEF_FUN2(
double, fmax);
1361__DEF_FUN2(
double, fmin);
1362__DEF_FUN2(
double, fmod);
1364__DEF_FUN2(
double, hypot);
1365__DEF_FUNI(
int, ilogb)
1366__HIP_OVERLOAD1(
bool, isfinite)
1367__HIP_OVERLOAD2(
bool, isgreater);
1368__HIP_OVERLOAD2(
bool, isgreaterequal);
1369__HIP_OVERLOAD1(
bool, isinf);
1370__HIP_OVERLOAD2(
bool, isless);
1371__HIP_OVERLOAD2(
bool, islessequal);
1372__HIP_OVERLOAD2(
bool, islessgreater);
1373__HIP_OVERLOAD1(
bool, isnan);
1375__HIP_OVERLOAD2(
bool, isunordered);
1376__DEF_FUN1(
double, lgamma)
1377__DEF_FUN1(
double, log)
1378__DEF_FUN1(
double, log10)
1379__DEF_FUN1(
double, log1p)
1380__DEF_FUN1(
double, log2)
1381__DEF_FUN1(
double, logb)
1382__DEF_FUNI(
long long, llrint)
1383__DEF_FUNI(
long long, llround)
1384__DEF_FUNI(
long, lrint)
1385__DEF_FUNI(
long, lround)
1386__DEF_FUN1(
double, nearbyint);
1387__DEF_FUN2(
double, nextafter);
1388__DEF_FUN2(
double, pow);
1389__DEF_FUN2(
double, remainder);
1390__DEF_FUN1(
double, rint);
1391__DEF_FUN1(
double, round);
1392__HIP_OVERLOAD1(
bool, signbit)
1393__DEF_FUN1(
double, sin)
1394__DEF_FUN1(
double, sinh)
1395__DEF_FUN1(
double, sqrt)
1396__DEF_FUN1(
double, tan)
1397__DEF_FUN1(
double, tanh)
1398__DEF_FUN1(
double, tgamma)
1399__DEF_FUN1(
double, trunc);
1402#define __DEF_FLOAT_FUN2I(func) \
1405float func(float x, int y) \
1407 return func##f(x, y); \
1409__DEF_FLOAT_FUN2I(scalbn)
1410__DEF_FLOAT_FUN2I(ldexp)
1413__DEVICE__
inline T min(T arg1, T arg2) {
1414 return (arg1 < arg2) ? arg1 : arg2;
1418__DEVICE__
inline T max(T arg1, T arg2) {
1419 return (arg1 > arg2) ? arg1 : arg2;
1422__DEVICE__
inline int min(
int arg1,
int arg2) {
1423 return (arg1 < arg2) ? arg1 : arg2;
1425__DEVICE__
inline int max(
int arg1,
int arg2) {
1426 return (arg1 > arg2) ? arg1 : arg2;
1429__DEVICE__
inline int min(uint32_t arg1,
int arg2) {
1430 return (arg1 < arg2) ? arg1 : arg2;
1432__DEVICE__
inline int max(uint32_t arg1,
int arg2) {
1433 return (arg1 > arg2) ? arg1 : arg2;
1438float max(
float x,
float y) {
1444double max(
double x,
double y) {
1450float min(
float x,
float y) {
1456double min(
double x,
double y) {
1460__HIP_OVERLOAD2(
double, max)
1461__HIP_OVERLOAD2(
double, min)
1463#if !defined(__HIPCC_RTC__)
1464__host__ inline static int min(
int arg1,
int arg2) {
1465 return std::min(arg1, arg2);
1468__host__ inline static int max(
int arg1,
int arg2) {
1469 return std::max(arg1, arg2);
1474inline float pow(
float base,
int iexp) {
1475 return powif(base, iexp);
1479inline double pow(
double base,
int iexp) {
1480 return powi(base, iexp);
1484inline _Float16 pow(_Float16 base,
int iexp) {
1485 return __ocml_pown_f16(base, iexp);
1488#pragma pop_macro("__DEF_FLOAT_FUN")
1489#pragma pop_macro("__DEF_FLOAT_FUN2")
1490#pragma pop_macro("__DEF_FLOAT_FUN2I")
1491#pragma pop_macro("__HIP_OVERLOAD")
1492#pragma pop_macro("__HIP_OVERLOAD2")
1496#pragma pop_macro("__DEVICE__")
1497#pragma pop_macro("__RETURN_TYPE")
1502#include <hip/amd_detail/amd_hip_runtime.h>
#define __host__
Definition host_defines.h:170
Definition amd_hip_vector_types.h:1623
Definition amd_hip_vector_types.h:1660
Definition amd_hip_vector_types.h:1690
Definition amd_hip_vector_types.h:1727
Definition amd_math_functions.h:1279