magic_div.hpp Source File

magic_div.hpp Source File#

Composable Kernel: magic_div.hpp Source File
magic_div.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
11#include <stdint.h>
12
13namespace ck_tile {
14
15// magic number division
16// Caution:
17// 1. For uint32_t as dividend: magic number division implementation being used would produce
18// correct result if the dividend is uint32_t and its value is within 31-bit value range.
19// 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
20// implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
21// division implementation for uint32_t is then used. Therefore, dividend value need to be
22// non-negative.
23// TODO:
24// 1. Implement magic number divison for int32_t
25// 2. Implement magic number divison for unit32_t with 32-bit value range
27{
28 // uint32_t
30 {
31 // WARNING: magic division is only valid for division inside this range.
32 // assert(divisor >= 1 && divisor <= INT32_MAX)
33
34 uint32_t shift_u32 = 0;
35
36 while((1U << shift_u32) < divisor)
37 {
38 shift_u32++;
39 };
40
41 uint64_t tmp_u64 = static_cast<uint64_t>((1UL << shift_u32) - divisor) << 32;
42 uint32_t multiplier_u32 = tmp_u64 / divisor + 1;
43
44 return make_tuple(multiplier_u32, shift_u32);
45 }
46
47 template <auto Divisor, typename = std::enable_if_t<(0 < Divisor)>>
48 CK_TILE_HOST_DEVICE static constexpr auto calculate_magic_numbers(constant<Divisor>)
49 {
50 constexpr auto tmp = calculate_magic_numbers(uint32_t{Divisor});
51
52 constexpr uint32_t multiplier = tmp[number<0>{}];
53 constexpr uint32_t shift = tmp[number<1>{}];
54
55 return make_tuple(constant<multiplier>{}, constant<shift>{});
56 }
57
58 // magic division for uint32_t
59 CK_TILE_DEVICE static constexpr uint32_t
60 do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
61 {
62 if(__builtin_is_constant_evaluated())
63 {
64 uint32_t tmp = (static_cast<uint64_t>(dividend) * multiplier) >> 32;
65 return (tmp + dividend) >> shift;
66 }
67 else
68 {
69 uint32_t tmp = __umulhi(dividend, multiplier);
70 return (tmp + dividend) >> shift;
71 }
72 }
73
74 CK_TILE_HOST static constexpr uint32_t
75 do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
76 {
77 uint32_t tmp = (static_cast<uint64_t>(dividend) * multiplier) >> 32;
78 return (tmp + dividend) >> shift;
79 }
80
81 // magic division for int32_t
82 // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
83 // non-negative for result to be correct
84 // TODO: figure out how to do magic number divison for int32_t as dividended
85 CK_TILE_DEVICE static constexpr int32_t
86 do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
87 {
88 if(__builtin_is_constant_evaluated())
89 {
90 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
91 uint32_t tmp = (static_cast<uint64_t>(dividend_u32) * multiplier) >> 32;
92 return (tmp + dividend_u32) >> shift;
93 }
94 else
95 {
96 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
97 uint32_t tmp = __umulhi(dividend_u32, multiplier);
98 return (tmp + dividend_u32) >> shift;
99 }
100 }
101
102 CK_TILE_HOST static constexpr int32_t
103 do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
104 {
105 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
106 uint32_t tmp = (static_cast<uint64_t>(dividend_u32) * multiplier) >> 32;
107 return (tmp + dividend_u32) >> shift;
108 }
109};
110
111// magic number division
112// This version on works for divisor and dividended between [0, 1 << 16]
114{
115 // uint32_t
117 {
118 // WARNING: magic division is only valid for division inside this range.
119 // assert(divisor >= 1 && divisor <= (1U << 16));
120
121 uint32_t shift_u32 = 0;
122
123 while((1U << shift_u32) < divisor)
124 {
125 shift_u32++;
126 };
127
128 uint32_t one = 1;
129 uint32_t multiplier_u32 = ((one << 16) * ((one << shift_u32) - divisor)) / divisor + 1;
130
131 return make_tuple(multiplier_u32, shift_u32);
132 }
133
134 // integral_constant<uint32_t, .>
135 template <auto Divisor>
137 {
138 constexpr auto tmp = calculate_magic_numbers(uint32_t{Divisor});
139
140 constexpr uint32_t multiplier = tmp[number<0>{}];
141 constexpr uint32_t shift = tmp[number<1>{}];
142
144 }
145
146 // magic division for uint32_t
147 CK_TILE_DEVICE static constexpr uint32_t
148 do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
149 {
150 uint32_t tmp = (dividend * multiplier) >> 16;
151 return (tmp + dividend) >> shift;
152 }
153
154 CK_TILE_HOST static constexpr uint32_t
155 do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
156 {
157 uint32_t tmp = (dividend * multiplier) >> 16;
158 return (tmp + dividend) >> shift;
159 }
160
161 // magic division for int32_t
162 // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
163 // non-negative for result to be correct
164 // TODO: figure out how to do magic number divison for int32_t as dividended
165 CK_TILE_DEVICE static constexpr int32_t
166 do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
167 {
168 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
169 uint32_t tmp = (dividend_u32 * multiplier) >> 16;
170 return (tmp + dividend_u32) >> shift;
171 }
172
173 CK_TILE_HOST static constexpr int32_t
174 do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
175 {
176 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
177 uint32_t tmp = (dividend_u32 * multiplier) >> 16;
178 return (tmp + dividend_u32) >> shift;
179 }
180};
181
182// use 32bit version
184
185struct mdiv
186{
187 // 1 dword -> 3 dword storage
190 uint32_t shift; // TODO: 8 bit is enough
191
192 // prefer construct on host
194 {
195 auto tmp = magic_division::calculate_magic_numbers(divisor_);
196
197 multiplier = tmp[number<0>{}];
198 shift = tmp[number<1>{}];
199 }
200
202
204 {
205 divisor = divisor_;
206 auto tmp = magic_division::calculate_magic_numbers(divisor_);
207
208 multiplier = tmp[number<0>{}];
209 shift = tmp[number<1>{}];
210 }
211
213 {
215 }
216
218 divmod(uint32_t dividend_, uint32_t& quotient_, uint32_t& remainder_) const
219 {
220 quotient_ = div(dividend_);
221 remainder_ = dividend_ - (quotient_ * divisor);
222 }
223
225};
226
227struct mdiv2
228{
229 // 1 dword -> 2 dword storage, divisor need compute from runtime
231 uint32_t shift; // TODO: 8 bit is enough
232
233 // prefer construct on host
235 {
236 auto tmp = magic_division::calculate_magic_numbers(divisor_);
237
238 multiplier = tmp[number<0>{}];
239 shift = tmp[number<1>{}];
240 }
241
243
245 {
247 }
248
250 divmod(uint32_t dividend_, uint32_t divisor_, uint32_t& quotient_, uint32_t& remainder_) const
251 {
252 quotient_ = div(dividend_);
253 remainder_ = dividend_ - (quotient_ * divisor_);
254 }
255};
256
257} // namespace ck_tile
#define CK_TILE_DEVICE
Definition config.hpp:41
#define CK_TILE_HOST
Definition config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition config.hpp:42
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_HOST_DEVICE constexpr Y bit_cast(const X &x)
Definition bit_cast.hpp:11
constant< v > number
Definition tile/core/numeric/integral_constant.hpp:37
int32_t int32_t
Definition integer.hpp:10
magic_division32_bit_range magic_division
Definition magic_div.hpp:183
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
unsigned int uint32_t
Definition stdint.h:126
unsigned __int64 uint64_t
Definition stdint.h:136
Definition tile/core/numeric/integral_constant.hpp:13
Definition magic_div.hpp:114
static CK_TILE_HOST_DEVICE constexpr auto calculate_magic_numbers(constant< Divisor >)
Definition magic_div.hpp:136
static CK_TILE_HOST constexpr int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:174
static CK_TILE_DEVICE constexpr int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:166
static CK_TILE_HOST constexpr uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:155
static CK_TILE_HOST_DEVICE constexpr auto calculate_magic_numbers(uint32_t divisor)
Definition magic_div.hpp:116
static CK_TILE_DEVICE constexpr uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:148
Definition magic_div.hpp:27
static CK_TILE_DEVICE constexpr uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:60
static CK_TILE_HOST constexpr int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:103
static CK_TILE_DEVICE constexpr int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:86
static CK_TILE_HOST constexpr uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition magic_div.hpp:75
static CK_TILE_HOST_DEVICE constexpr auto calculate_magic_numbers(uint32_t divisor)
Definition magic_div.hpp:29
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t &quotient_, uint32_t &remainder_) const
Definition magic_div.hpp:250
CK_TILE_HOST_DEVICE mdiv2(uint32_t divisor_)
Definition magic_div.hpp:234
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition magic_div.hpp:244
uint32_t multiplier
Definition magic_div.hpp:230
CK_TILE_HOST_DEVICE mdiv2()
Definition magic_div.hpp:242
uint32_t shift
Definition magic_div.hpp:231
CK_TILE_HOST_DEVICE mdiv(uint32_t divisor_)
Definition magic_div.hpp:193
CK_TILE_HOST_DEVICE uint32_t get() const
Definition magic_div.hpp:224
CK_TILE_HOST_DEVICE mdiv()
Definition magic_div.hpp:201
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t &quotient_, uint32_t &remainder_) const
Definition magic_div.hpp:218
uint32_t divisor
Definition magic_div.hpp:188
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition magic_div.hpp:212
uint32_t shift
Definition magic_div.hpp:190
uint32_t multiplier
Definition magic_div.hpp:189
CK_TILE_HOST_DEVICE void update(uint32_t divisor_)
Definition magic_div.hpp:203