| // Copyright lowRISC contributors. |
| // Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| // SPDX-License-Identifier: Apache-2.0 |
| |
| /** |
| * @file |
| * @brief Implementations of libgcc-style polyfills for arithmetic. |
| * |
| * This file has no header, since its functions should not be called directly; |
| * the compiler will generate calls into them as needed. |
| * |
| * The functions have names like `_ot_builtin_*`, rather than their libgcc |
| * names, so that they can coexist with libgcc/libcompiler-rt on the host-side |
| * for the purpose of unit tests. The linker aliases for the libgcc names only |
| * exist on the device-side. |
| * |
| * See https://github.com/llvm/llvm-project/tree/main/compiler-rt/lib/builtins |
| * for a detailed specification of the ABI we implement in this file. |
| * |
| * The provided functions here are: |
| * - 64-bit shifts. |
| * - 32-bit popcount, parity, bswap, clz, ctz, and find first. |
| * |
| * Although the RISC-V B extension provides instructions for some ofthese, we |
| * currently do not require using a Clang that is aware of how to codegen them, |
| * so LLVM may choose to emit libgcc polyfill symbols (like the following) |
| * instead. Once we mandate such a Clang, they should be removed. |
| */ |
| |
| #include <stdint.h> |
| #include <stdnoreturn.h> |
| |
| #include "sw/device/lib/base/macros.h" |
| |
| /** |
| * Helper type for fussing with the upper and lower halves of an i64. |
| * |
| * This is necessary to avoid accidentally calling into the shift polyfills |
| * inside of their implementations. |
| */ |
| typedef union u32x2 { |
| struct { |
| uint32_t lo; |
| union { |
| uint32_t hi; |
| int32_t hi_signed; |
| }; |
| }; |
| int64_t full; |
| } u32x2_t; |
| |
| int64_t _ot_builtin_lshift_i64(int64_t val, int32_t shift) { |
| if (shift == 0) { |
| return val; |
| } |
| |
| u32x2_t word = {.full = val}; |
| |
| // When doing a big shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // bbbb'bb00'0000'0000 |
| if (shift >= 32) { |
| word.hi = word.lo << (shift - 32); |
| word.lo = 0; |
| return word.full; |
| } |
| |
| // When doing a small shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // aaaa'aabb'bbbb'bb00 |
| word.hi <<= shift; |
| word.hi |= word.lo >> (32 - shift); |
| word.lo <<= shift; |
| return word.full; |
| } |
| |
| int64_t _ot_builtin_rshift_i64(int64_t val, int32_t shift) { |
| if (shift == 0) { |
| return val; |
| } |
| |
| u32x2_t word = {.full = val}; |
| |
| // When doing a big shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // 0000'0000'00aa'aaaa |
| if (shift >= 32) { |
| word.lo = word.hi >> (shift - 32); |
| word.hi = 0; |
| return word.full; |
| } |
| |
| // When doing a small shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // 00aa'aaaa'aabb'bbbb |
| word.lo >>= shift; |
| word.lo |= word.hi << (32 - shift); |
| word.hi >>= shift; |
| |
| return word.full; |
| } |
| |
| int64_t _ot_builtin_ashift_i64(int64_t val, int32_t shift) { |
| if (shift == 0) { |
| return val; |
| } |
| |
| u32x2_t word = {.full = val}; |
| |
| // When doing a big shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // ssss'ssss'ssaa'aaaa |
| // |
| // where s is sign bits. |
| if (shift >= 32) { |
| word.lo = word.hi_signed >> (shift - 32); |
| word.hi_signed >>= 31; |
| return word.full; |
| } |
| |
| // When doing a small shift, we have |
| // |
| // aaaa'aaaa'bbbb'bbbb |
| // ssaa'aaaa'aabb'bbbb |
| // |
| // where s is sign bits. |
| word.lo >>= shift; |
| word.lo |= word.hi << (32 - shift); |
| word.hi_signed >>= shift; |
| |
| return word.full; |
| } |
| |
| int32_t _ot_builtin_bswap_i32(int32_t val) { |
| uint32_t x = val; |
| return (((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | |
| ((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24)); |
| } |
| |
| int _ot_builtin_popcount_i32(int32_t val) { |
| // Standard "sum of interleaved bits" algorithm for popcount. |
| // |
| // Each iteration, we take an array of [N x iM], mask them out into arrays of |
| // even and odd entries, shift the even one so it lines up with the odd one, |
| // and add them, making a [N/2 x i(M * 2)]. We start from a [32 x i1] and |
| // work towards a [1 x i32]. |
| uint32_t i1x32 = val; |
| uint32_t i2x16 = (i1x32 & 0x55555555) + ((i1x32 >> 1) & 0x55555555); |
| uint32_t i4x8 = (i2x16 & 0x33333333) + ((i2x16 >> 2) & 0x33333333); |
| uint32_t i8x4 = (i4x8 & 0x0f0f0f0f) + ((i4x8 >> 4) & 0x0f0f0f0f); |
| uint32_t i16x2 = (i8x4 & 0x00ff00ff) + ((i8x4 >> 8) & 0x00ff00ff); |
| uint32_t i32x1 = (i16x2 & 0xffff) + ((i16x2 >> 16) & 0xffff); |
| |
| // i32x1 will never be greater than 32, so it can be safely converted. |
| return (int)i32x1; |
| } |
| |
| int _ot_builtin_parity_i32(int32_t val) { |
| // Parity can be implemented as XOR of all bits. |
| uint32_t x = val; |
| for (int i = 16; i > 0; i /= 2) { |
| x ^= x >> i; |
| } |
| return x & 1; |
| } |
| |
| // NOTE: val != 0 is a precondition of the ABI. |
| int _ot_builtin_ctz_i32(int32_t val) { |
| // Binary search, adapted from Ch2 of Hacker's Delight. |
| uint32_t x = val; |
| int count = 0; |
| for (int i = 16; i > 0; i /= 2) { |
| int32_t mask = (1u << i) - 1; |
| if ((x & mask) == 0) { |
| count += i; |
| x >>= i; |
| } |
| } |
| return count; |
| } |
| |
| // NOTE: val != 0 is a precondition of the ABI. |
| int _ot_builtin_clz_i32(int32_t val) { |
| // Binary search, adapted from Ch2 of Hacker's Delight. |
| uint32_t x = val; |
| int count = 0; |
| for (int i = 16; i > 0; i /= 2) { |
| int32_t mask = ((1u << i) - 1) << (32 - i); |
| if ((x & mask) == 0) { |
| count += i; |
| x <<= i; |
| } |
| } |
| return count; |
| } |
| |
| int _ot_builtin_find_first_i32(int32_t val) { |
| if (val == 0) { |
| return 0; |
| } |
| |
| // LSB is bit index 1. |
| return _ot_builtin_ctz_i32(val) + 1; |
| } |
| |
| // Linker aliases for libgcc symbols. |
| #ifdef OT_PLATFORM_RV32 |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_lshift_i64") |
| int64_t __ashldi3(int64_t val, int32_t shift); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_rshift_i64") |
| int64_t __lshrdi3(int64_t val, int32_t shift); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_ashift_i64") |
| int64_t __ashrdi3(int64_t val, int32_t shift); |
| |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_bswap_i32") |
| int32_t __bswapsi2(int32_t val); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_popcount_i32") |
| int __popcountsi2(int32_t val); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_parity_i32") |
| int __paritysi2(int32_t val); |
| |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_ctz_i32") |
| int __ctzsi2(int32_t val); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_clz_i32") |
| int __clzsi2(int32_t val); |
| OT_WEAK |
| OT_ALIAS("_ot_builtin_find_first_i32") |
| int __ffssi2(int32_t val); |
| |
| extern noreturn void |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(void); |
| |
| // "Trap" polyfills to catch uses of u64 divsion and display an "error" via |
| // the name of an undefined symbol. |
| // |
| // Of course, this depends on people linking this file in... but hopefully it |
| // is sufficiently pervasive that that won't be an issue; at any rate, it means |
| // people will land somewhere when they grep for __udivdi3 and friends. |
| OT_WEAK int64_t __divdi3(int64_t a, int64_t b) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| OT_WEAK uint64_t __udivdi3(uint64_t a, uint64_t b) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| OT_WEAK int64_t __moddi3(int64_t a, int64_t b) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| OT_WEAK uint64_t __umoddi3(uint64_t a, uint64_t b) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| OT_WEAK int64_t __divmoddi4(int64_t a, int64_t b, int64_t *rem) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| OT_WEAK uint64_t __udivmoddi4(uint64_t a, uint64_t b, uint64_t *rem) { |
| _ot_builtin_div64_intentionally_not_implemented_see_pull_11451(); |
| } |
| #endif // OT_PLATFORM_RV32 |