|
| 1 | +// GCC 32/64-bit integer arithmetic support for 32-bit systems that can't link |
| 2 | +// to libgcc. |
| 3 | + |
| 4 | +// Function prototypes and descriptions are taken from |
| 5 | +// https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html. |
| 6 | + |
| 7 | +// This file may be #include'd by another file, so we try not to pollute the |
| 8 | +// namespace and we don't import any headers. |
| 9 | + |
| 10 | +// All functions must be resolvable by the linker and therefore can't be inline |
| 11 | +// or static, even if they're #included into the file where they'll be used. |
| 12 | + |
| 13 | +// For best performance we try to avoid branching. This makes the code a little |
| 14 | +// weird in places. |
| 15 | + |
| 16 | +// See https://github.com/glitchub/arith64 for more information. |
| 17 | +// This software is released as-is into the public domain, as described at |
| 18 | +// https://unlicense.org. Do whatever you like with it. |
| 19 | + |
| 20 | +#define arith64_u64 unsigned long long int |
| 21 | +#define arith64_s64 signed long long int |
| 22 | +#define arith64_u32 unsigned int |
| 23 | +#define arith64_s32 int |
| 24 | + |
| 25 | +typedef union |
| 26 | +{ |
| 27 | + arith64_u64 u64; |
| 28 | + arith64_s64 s64; |
| 29 | + struct |
| 30 | + { |
| 31 | +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| 32 | + arith64_u32 hi; arith64_u32 lo; |
| 33 | +#else |
| 34 | + arith64_u32 lo; arith64_u32 hi; |
| 35 | +#endif |
| 36 | + } u32; |
| 37 | + struct |
| 38 | + { |
| 39 | +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| 40 | + arith64_s32 hi; arith64_s32 lo; |
| 41 | +#else |
| 42 | + arith64_s32 lo; arith64_s32 hi; |
| 43 | +#endif |
| 44 | + } s32; |
| 45 | +} arith64_word; |
| 46 | + |
| 47 | +// extract hi and lo 32-bit words from 64-bit value |
| 48 | +#define arith64_hi(n) (arith64_word){.u64=n}.u32.hi |
| 49 | +#define arith64_lo(n) (arith64_word){.u64=n}.u32.lo |
| 50 | + |
| 51 | +// Negate a if b is negative, via invert and increment. |
| 52 | +#define arith64_neg(a, b) (((a) ^ ((((arith64_s64)(b)) >= 0) - 1)) + (((arith64_s64)(b)) < 0)) |
| 53 | +#define arith64_abs(a) arith64_neg(a, a) |
| 54 | + |
| 55 | +// Return the absolute value of a. |
| 56 | +// Note LLINT_MIN cannot be negated. |
| 57 | +arith64_s64 __absvdi2(arith64_s64 a) |
| 58 | +{ |
| 59 | + return arith64_abs(a); |
| 60 | +} |
| 61 | + |
| 62 | +// Return the result of shifting a left by b bits. |
| 63 | +arith64_s64 __ashldi3(arith64_s64 a, int b) |
| 64 | +{ |
| 65 | + arith64_word w = {.s64 = a}; |
| 66 | + |
| 67 | + b &= 63; |
| 68 | + |
| 69 | + if (b >= 32) |
| 70 | + { |
| 71 | + w.u32.hi = w.u32.lo << (b - 32); |
| 72 | + w.u32.lo = 0; |
| 73 | + } else if (b) |
| 74 | + { |
| 75 | + w.u32.hi = (w.u32.lo >> (32 - b)) | (w.u32.hi << b); |
| 76 | + w.u32.lo <<= b; |
| 77 | + } |
| 78 | + return w.s64; |
| 79 | +} |
| 80 | + |
| 81 | +// Return the result of arithmetically shifting a right by b bits. |
| 82 | +arith64_s64 __ashrdi3(arith64_s64 a, int b) |
| 83 | +{ |
| 84 | + arith64_word w = {.s64 = a}; |
| 85 | + |
| 86 | + b &= 63; |
| 87 | + |
| 88 | + if (b >= 32) |
| 89 | + { |
| 90 | + w.s32.lo = w.s32.hi >> (b - 32); |
| 91 | + w.s32.hi >>= 31; // 0xFFFFFFFF or 0 |
| 92 | + } else if (b) |
| 93 | + { |
| 94 | + w.u32.lo = (w.u32.hi << (32 - b)) | (w.u32.lo >> b); |
| 95 | + w.s32.hi >>= b; |
| 96 | + } |
| 97 | + return w.s64; |
| 98 | +} |
| 99 | + |
| 100 | +// These functions return the number of leading 0-bits in a, starting at the |
| 101 | +// most significant bit position. If a is zero, the result is undefined. |
| 102 | +int __clzsi2(arith64_u32 a) |
| 103 | +{ |
| 104 | + int b, n = 0; |
| 105 | + b = !(a & 0xffff0000) << 4; n += b; a <<= b; |
| 106 | + b = !(a & 0xff000000) << 3; n += b; a <<= b; |
| 107 | + b = !(a & 0xf0000000) << 2; n += b; a <<= b; |
| 108 | + b = !(a & 0xc0000000) << 1; n += b; a <<= b; |
| 109 | + return n + !(a & 0x80000000); |
| 110 | +} |
| 111 | + |
| 112 | +int __clzdi2(arith64_u64 a) |
| 113 | +{ |
| 114 | + int b, n = 0; |
| 115 | + b = !(a & 0xffffffff00000000ULL) << 5; n += b; a <<= b; |
| 116 | + b = !(a & 0xffff000000000000ULL) << 4; n += b; a <<= b; |
| 117 | + b = !(a & 0xff00000000000000ULL) << 3; n += b; a <<= b; |
| 118 | + b = !(a & 0xf000000000000000ULL) << 2; n += b; a <<= b; |
| 119 | + b = !(a & 0xc000000000000000ULL) << 1; n += b; a <<= b; |
| 120 | + return n + !(a & 0x8000000000000000ULL); |
| 121 | +} |
| 122 | + |
| 123 | +// These functions return the number of trailing 0-bits in a, starting at the |
| 124 | +// least significant bit position. If a is zero, the result is undefined. |
| 125 | +int __ctzsi2(arith64_u32 a) |
| 126 | +{ |
| 127 | + int b, n = 0; |
| 128 | + b = !(a & 0x0000ffff) << 4; n += b; a >>= b; |
| 129 | + b = !(a & 0x000000ff) << 3; n += b; a >>= b; |
| 130 | + b = !(a & 0x0000000f) << 2; n += b; a >>= b; |
| 131 | + b = !(a & 0x00000003) << 1; n += b; a >>= b; |
| 132 | + return n + !(a & 0x00000001); |
| 133 | +} |
| 134 | + |
| 135 | +int __ctzdi2(arith64_u64 a) |
| 136 | +{ |
| 137 | + int b, n = 0; |
| 138 | + b = !(a & 0x00000000ffffffffULL) << 5; n += b; a >>= b; |
| 139 | + b = !(a & 0x000000000000ffffULL) << 4; n += b; a >>= b; |
| 140 | + b = !(a & 0x00000000000000ffULL) << 3; n += b; a >>= b; |
| 141 | + b = !(a & 0x000000000000000fULL) << 2; n += b; a >>= b; |
| 142 | + b = !(a & 0x0000000000000003ULL) << 1; n += b; a >>= b; |
| 143 | + return n + !(a & 0x0000000000000001ULL); |
| 144 | +} |
| 145 | + |
| 146 | +// Calculate both the quotient and remainder of the unsigned division of a and |
| 147 | +// b. The return value is the quotient, and the remainder is placed in variable |
| 148 | +// pointed to by c (if it's not NULL). |
| 149 | +arith64_u64 __divmoddi4(arith64_u64 a, arith64_u64 b, arith64_u64 *c) |
| 150 | +{ |
| 151 | + if (b > a) // divisor > numerator? |
| 152 | + { |
| 153 | + if (c) *c = a; // remainder = numerator |
| 154 | + return 0; // quotient = 0 |
| 155 | + } |
| 156 | + if (!arith64_hi(b)) // divisor is 32-bit |
| 157 | + { |
| 158 | + if (b == 0) // divide by 0 |
| 159 | + { |
| 160 | + volatile char x = 0; x = 1 / x; // force an exception |
| 161 | + } |
| 162 | + if (b == 1) // divide by 1 |
| 163 | + { |
| 164 | + if (c) *c = 0; // remainder = 0 |
| 165 | + return a; // quotient = numerator |
| 166 | + } |
| 167 | + if (!arith64_hi(a)) // numerator is also 32-bit |
| 168 | + { |
| 169 | + if (c) // use generic 32-bit operators |
| 170 | + *c = arith64_lo(a) % arith64_lo(b); |
| 171 | + return arith64_lo(a) / arith64_lo(b); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + // let's do long division |
| 176 | + char bits = __clzdi2(b) - __clzdi2(a) + 1; // number of bits to iterate (a and b are non-zero) |
| 177 | + arith64_u64 rem = a >> bits; // init remainder |
| 178 | + a <<= 64 - bits; // shift numerator to the high bit |
| 179 | + arith64_u64 wrap = 0; // start with wrap = 0 |
| 180 | + while (bits-- > 0) // for each bit |
| 181 | + { |
| 182 | + rem = (rem << 1) | (a >> 63); // shift numerator MSB to remainder LSB |
| 183 | + a = (a << 1) | (wrap & 1); // shift out the numerator, shift in wrap |
| 184 | + wrap = ((arith64_s64)(b - rem - 1) >> 63); // wrap = (b > rem) ? 0 : 0xffffffffffffffff (via sign extension) |
| 185 | + rem -= b & wrap; // if (wrap) rem -= b |
| 186 | + } |
| 187 | + if (c) *c = rem; // maybe set remainder |
| 188 | + return (a << 1) | (wrap & 1); // return the quotient |
| 189 | +} |
| 190 | + |
| 191 | +// Return the quotient of the signed division of a and b. |
| 192 | +arith64_s64 __divdi3(arith64_s64 a, arith64_s64 b) |
| 193 | +{ |
| 194 | + arith64_u64 q = __divmoddi4(arith64_abs(a), arith64_abs(b), (void *)0); |
| 195 | + return arith64_neg(q, a^b); // negate q if a and b signs are different |
| 196 | +} |
| 197 | + |
| 198 | +// Return the index of the least significant 1-bit in a, or the value zero if a |
| 199 | +// is zero. The least significant bit is index one. |
| 200 | +int __ffsdi2(arith64_u64 a) |
| 201 | +{ |
| 202 | + return a ? __ctzdi2(a) + 1 : 0; |
| 203 | +} |
| 204 | + |
| 205 | +// Return the result of logically shifting a right by b bits. |
| 206 | +arith64_u64 __lshrdi3(arith64_u64 a, int b) |
| 207 | +{ |
| 208 | + arith64_word w = {.u64 = a}; |
| 209 | + |
| 210 | + b &= 63; |
| 211 | + |
| 212 | + if (b >= 32) |
| 213 | + { |
| 214 | + w.u32.lo = w.u32.hi >> (b - 32); |
| 215 | + w.u32.hi = 0; |
| 216 | + } else if (b) |
| 217 | + { |
| 218 | + w.u32.lo = (w.u32.hi << (32 - b)) | (w.u32.lo >> b); |
| 219 | + w.u32.hi >>= b; |
| 220 | + } |
| 221 | + return w.u64; |
| 222 | +} |
| 223 | + |
| 224 | +// Return the remainder of the signed division of a and b. |
| 225 | +arith64_s64 __moddi3(arith64_s64 a, arith64_s64 b) |
| 226 | +{ |
| 227 | + arith64_u64 r; |
| 228 | + __divmoddi4(arith64_abs(a), arith64_abs(b), &r); |
| 229 | + return arith64_neg(r, a); // negate remainder if numerator is negative |
| 230 | +} |
| 231 | + |
| 232 | +// Return the number of bits set in a. |
| 233 | +int __popcountsi2(arith64_u32 a) |
| 234 | +{ |
| 235 | + // collect sums into two low bytes |
| 236 | + a = a - ((a >> 1) & 0x55555555); |
| 237 | + a = ((a >> 2) & 0x33333333) + (a & 0x33333333); |
| 238 | + a = (a + (a >> 4)) & 0x0F0F0F0F; |
| 239 | + a = (a + (a >> 16)); |
| 240 | + // add the bytes, return bottom 6 bits |
| 241 | + return (a + (a >> 8)) & 63; |
| 242 | +} |
| 243 | + |
| 244 | +// Return the number of bits set in a. |
| 245 | +int __popcountdi2(arith64_u64 a) |
| 246 | +{ |
| 247 | + // collect sums into two low bytes |
| 248 | + a = a - ((a >> 1) & 0x5555555555555555ULL); |
| 249 | + a = ((a >> 2) & 0x3333333333333333ULL) + (a & 0x3333333333333333ULL); |
| 250 | + a = (a + (a >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |
| 251 | + a = (a + (a >> 32)); |
| 252 | + a = (a + (a >> 16)); |
| 253 | + // add the bytes, return bottom 7 bits |
| 254 | + return (a + (a >> 8)) & 127; |
| 255 | +} |
| 256 | + |
| 257 | +// Return the quotient of the unsigned division of a and b. |
| 258 | +arith64_u64 __udivdi3(arith64_u64 a, arith64_u64 b) |
| 259 | +{ |
| 260 | + return __divmoddi4(a, b, (void *)0); |
| 261 | +} |
| 262 | + |
| 263 | +// Return the remainder of the unsigned division of a and b. |
| 264 | +arith64_u64 __umoddi3(arith64_u64 a, arith64_u64 b) |
| 265 | +{ |
| 266 | + arith64_u64 r; |
| 267 | + __divmoddi4(a, b, &r); |
| 268 | + return r; |
| 269 | +} |
0 commit comments