blob: b8a32d7568a29cf0d81abd1fc3063e8ca194c5fe [file] [log] [blame]
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
/**
* @file
* @brief Implementations of libgcc-style polyfills for arithmetic.
*
* This file has no header, since its functions should not be called directly;
* the compiler will generate calls into them as needed.
*
* The functions have names like `_ot_builtin_*`, rather than their libgcc
* names, so that they can coexist with libgcc/libcompiler-rt on the host-side
* for the purpose of unit tests. The linker aliases for the libgcc names only
* exist on the device-side.
*
* See https://github.com/llvm/llvm-project/tree/main/compiler-rt/lib/builtins
* for a detailed specification of the ABI we implement in this file.
*
* The provided functions here are:
* - 64-bit shifts.
* - 32-bit popcount, parity, bswap, clz, ctz, and find first.
*
* Although the RISC-V B extension provides instructions for some ofthese, we
* currently do not require using a Clang that is aware of how to codegen them,
* so LLVM may choose to emit libgcc polyfill symbols (like the following)
* instead. Once we mandate such a Clang, they should be removed.
*/
#include <stdint.h>
#include <stdnoreturn.h>
#include "sw/device/lib/base/macros.h"
/**
* Helper type for fussing with the upper and lower halves of an i64.
*
* This is necessary to avoid accidentally calling into the shift polyfills
* inside of their implementations.
*/
typedef union u32x2 {
struct {
uint32_t lo;
union {
uint32_t hi;
int32_t hi_signed;
};
};
int64_t full;
} u32x2_t;
int64_t _ot_builtin_lshift_i64(int64_t val, int32_t shift) {
if (shift == 0) {
return val;
}
u32x2_t word = {.full = val};
// When doing a big shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// bbbb'bb00'0000'0000
if (shift >= 32) {
word.hi = word.lo << (shift - 32);
word.lo = 0;
return word.full;
}
// When doing a small shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// aaaa'aabb'bbbb'bb00
word.hi <<= shift;
word.hi |= word.lo >> (32 - shift);
word.lo <<= shift;
return word.full;
}
int64_t _ot_builtin_rshift_i64(int64_t val, int32_t shift) {
if (shift == 0) {
return val;
}
u32x2_t word = {.full = val};
// When doing a big shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// 0000'0000'00aa'aaaa
if (shift >= 32) {
word.lo = word.hi >> (shift - 32);
word.hi = 0;
return word.full;
}
// When doing a small shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// 00aa'aaaa'aabb'bbbb
word.lo >>= shift;
word.lo |= word.hi << (32 - shift);
word.hi >>= shift;
return word.full;
}
int64_t _ot_builtin_ashift_i64(int64_t val, int32_t shift) {
if (shift == 0) {
return val;
}
u32x2_t word = {.full = val};
// When doing a big shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// ssss'ssss'ssaa'aaaa
//
// where s is sign bits.
if (shift >= 32) {
word.lo = word.hi_signed >> (shift - 32);
word.hi_signed >>= 31;
return word.full;
}
// When doing a small shift, we have
//
// aaaa'aaaa'bbbb'bbbb
// ssaa'aaaa'aabb'bbbb
//
// where s is sign bits.
word.lo >>= shift;
word.lo |= word.hi << (32 - shift);
word.hi_signed >>= shift;
return word.full;
}
int32_t _ot_builtin_bswap_i32(int32_t val) {
uint32_t x = val;
return (((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) |
((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24));
}
int _ot_builtin_popcount_i32(int32_t val) {
// Standard "sum of interleaved bits" algorithm for popcount.
//
// Each iteration, we take an array of [N x iM], mask them out into arrays of
// even and odd entries, shift the even one so it lines up with the odd one,
// and add them, making a [N/2 x i(M * 2)]. We start from a [32 x i1] and
// work towards a [1 x i32].
uint32_t i1x32 = val;
uint32_t i2x16 = (i1x32 & 0x55555555) + ((i1x32 >> 1) & 0x55555555);
uint32_t i4x8 = (i2x16 & 0x33333333) + ((i2x16 >> 2) & 0x33333333);
uint32_t i8x4 = (i4x8 & 0x0f0f0f0f) + ((i4x8 >> 4) & 0x0f0f0f0f);
uint32_t i16x2 = (i8x4 & 0x00ff00ff) + ((i8x4 >> 8) & 0x00ff00ff);
uint32_t i32x1 = (i16x2 & 0xffff) + ((i16x2 >> 16) & 0xffff);
// i32x1 will never be greater than 32, so it can be safely converted.
return (int)i32x1;
}
int _ot_builtin_parity_i32(int32_t val) {
// Parity can be implemented as XOR of all bits.
uint32_t x = val;
for (int i = 16; i > 0; i /= 2) {
x ^= x >> i;
}
return x & 1;
}
// NOTE: val != 0 is a precondition of the ABI.
int _ot_builtin_ctz_i32(int32_t val) {
// Binary search, adapted from Ch2 of Hacker's Delight.
uint32_t x = val;
int count = 0;
for (int i = 16; i > 0; i /= 2) {
int32_t mask = (1u << i) - 1;
if ((x & mask) == 0) {
count += i;
x >>= i;
}
}
return count;
}
// NOTE: val != 0 is a precondition of the ABI.
int _ot_builtin_clz_i32(int32_t val) {
// Binary search, adapted from Ch2 of Hacker's Delight.
uint32_t x = val;
int count = 0;
for (int i = 16; i > 0; i /= 2) {
int32_t mask = ((1u << i) - 1) << (32 - i);
if ((x & mask) == 0) {
count += i;
x <<= i;
}
}
return count;
}
int _ot_builtin_find_first_i32(int32_t val) {
if (val == 0) {
return 0;
}
// LSB is bit index 1.
return _ot_builtin_ctz_i32(val) + 1;
}
// Linker aliases for libgcc symbols.
#ifdef OT_PLATFORM_RV32
OT_WEAK
OT_ALIAS("_ot_builtin_lshift_i64")
int64_t __ashldi3(int64_t val, int32_t shift);
OT_WEAK
OT_ALIAS("_ot_builtin_rshift_i64")
int64_t __lshrdi3(int64_t val, int32_t shift);
OT_WEAK
OT_ALIAS("_ot_builtin_ashift_i64")
int64_t __ashrdi3(int64_t val, int32_t shift);
OT_WEAK
OT_ALIAS("_ot_builtin_bswap_i32")
int32_t __bswapsi2(int32_t val);
OT_WEAK
OT_ALIAS("_ot_builtin_popcount_i32")
int __popcountsi2(int32_t val);
OT_WEAK
OT_ALIAS("_ot_builtin_parity_i32")
int __paritysi2(int32_t val);
OT_WEAK
OT_ALIAS("_ot_builtin_ctz_i32")
int __ctzsi2(int32_t val);
OT_WEAK
OT_ALIAS("_ot_builtin_clz_i32")
int __clzsi2(int32_t val);
OT_WEAK
OT_ALIAS("_ot_builtin_find_first_i32")
int __ffssi2(int32_t val);
extern noreturn void
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451(void);
// "Trap" polyfills to catch uses of u64 divsion and display an "error" via
// the name of an undefined symbol.
//
// Of course, this depends on people linking this file in... but hopefully it
// is sufficiently pervasive that that won't be an issue; at any rate, it means
// people will land somewhere when they grep for __udivdi3 and friends.
OT_WEAK int64_t __divdi3(int64_t a, int64_t b) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
OT_WEAK uint64_t __udivdi3(uint64_t a, uint64_t b) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
OT_WEAK int64_t __moddi3(int64_t a, int64_t b) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
OT_WEAK uint64_t __umoddi3(uint64_t a, uint64_t b) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
OT_WEAK int64_t __divmoddi4(int64_t a, int64_t b, int64_t *rem) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
OT_WEAK uint64_t __udivmoddi4(uint64_t a, uint64_t b, uint64_t *rem) {
_ot_builtin_div64_intentionally_not_implemented_see_pull_11451();
}
#endif // OT_PLATFORM_RV32