blob: 81110d84878ac113861255658b831f7e8e782036 [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Implementation of the primitives from stdalign.h used for cross-target
// value alignment specification and queries.
#ifndef IREE_BASE_ALIGNMENT_H_
#define IREE_BASE_ALIGNMENT_H_
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "iree/base/attributes.h"
#include "iree/base/config.h"
#include "iree/base/target_platform.h"
#ifdef __cplusplus
extern "C" {
#endif
//==============================================================================
// IREE_PTR_SIZE_*
//==============================================================================
// Verify that the pointer size of the machine matches the expectation that
// uintptr_t can round-trip the value. This isn't a common issue unless the
// toolchain is doing weird things.
// See https://stackoverflow.com/q/51616057.
static_assert(sizeof(void*) == sizeof(uintptr_t),
"can't determine pointer size");
#if UINTPTR_MAX == 0xFFFFFFFF
#define IREE_PTR_SIZE_32
#define IREE_PTR_SIZE 4
#elif UINTPTR_MAX == 0xFFFFFFFFFFFFFFFFu
#define IREE_PTR_SIZE_64
#define IREE_PTR_SIZE 8
#else
#error "can't determine pointer size"
#endif
//===----------------------------------------------------------------------===//
// Alignment utilities
//===----------------------------------------------------------------------===//
// Returns the number of elements in an array as a compile-time constant, which
// can be used in defining new arrays. Fails at compile-time if |arr| is not a
// static array (such as if used on a pointer type). Similar to `countof()`.
//
// Example:
// uint8_t kConstantArray[512];
// assert(IREE_ARRAYSIZE(kConstantArray) == 512);
#define IREE_ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#define iree_min(lhs, rhs) ((lhs) <= (rhs) ? (lhs) : (rhs))
#define iree_max(lhs, rhs) ((lhs) <= (rhs) ? (rhs) : (lhs))
// https://en.cppreference.com/w/c/types/max_align_t
#if defined(IREE_PLATFORM_WINDOWS)
// NOTE: 16 is a specified Microsoft API requirement for some functions.
#define iree_max_align_t 16
#else
#define iree_max_align_t sizeof(long double)
#endif // IREE_PLATFORM_*
// https://en.cppreference.com/w/c/language/_Alignas
// https://en.cppreference.com/w/c/language/_Alignof
#if defined(IREE_COMPILER_MSVC)
#define iree_alignas(x) __declspec(align(x))
#define iree_alignof(x) __alignof(x)
#else
#define iree_alignas(x) __attribute__((__aligned__(x)))
#define iree_alignof(x) __alignof__(x)
#endif // IREE_COMPILER_*
// Aligns |value| up to the given power-of-two |alignment| if required.
// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
static inline iree_host_size_t iree_host_align(iree_host_size_t value,
iree_host_size_t alignment) {
return (value + (alignment - 1)) & ~(alignment - 1);
}
// Returns true if |value| is a power-of-two.
static inline bool iree_host_size_is_power_of_two(iree_host_size_t value) {
return (value != 0) && ((value & (value - 1)) == 0);
}
// Returns true if |value| matches the given minimum |alignment|.
static inline bool iree_host_size_has_alignment(iree_host_size_t value,
iree_host_size_t alignment) {
return iree_host_align(value, alignment) == value;
}
// Aligns |value| up to the given power-of-two |alignment| if required.
// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
static inline iree_device_size_t iree_device_align(
iree_device_size_t value, iree_device_size_t alignment) {
return (value + (alignment - 1)) & ~(alignment - 1);
}
// Returns true if |value| is a power-of-two.
static inline bool iree_device_size_is_power_of_two(iree_device_size_t value) {
return (value != 0) && ((value & (value - 1)) == 0);
}
// Returns true if |value| matches the given minimum |alignment|.
static inline bool iree_device_size_has_alignment(
iree_device_size_t value, iree_device_size_t alignment) {
return iree_device_align(value, alignment) == value;
}
// Returns true if |value| is a power-of-two.
static inline bool iree_is_power_of_two_uint64(uint64_t value) {
return (value != 0) && ((value & (value - 1)) == 0);
}
// Aligns |value| up to the given power-of-two |alignment| if required.
// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
static inline uint64_t iree_align_uint64(uint64_t value, uint64_t alignment) {
return (value + (alignment - 1)) & ~(alignment - 1);
}
// Returns the size of a struct padded out to iree_max_align_t.
// This must be used when performing manual trailing allocation packing to
// ensure the alignment requirements of the trailing data are satisfied.
//
// NOTE: do not use this if using VLAs (`struct { int trailing[]; }`) - those
// must precisely follow the normal sizeof(t) as the compiler does the padding
// for you.
//
// Example:
// some_buffer_ptr_t* p = NULL;
// iree_host_size_t total_size = iree_sizeof_struct(*buffer) + extra_data_size;
// IREE_CHECK_OK(iree_allocator_malloc(allocator, total_size, (void**)&p));
#define iree_sizeof_struct(t) iree_host_align(sizeof(t), iree_max_align_t)
// Returns the ceil-divide of |lhs| by non-zero |rhs|.
static inline iree_host_size_t iree_host_size_ceil_div(iree_host_size_t lhs,
iree_host_size_t rhs) {
return ((lhs != 0) && (lhs > 0) == (rhs > 0))
? ((lhs + ((rhs > 0) ? -1 : 1)) / rhs) + 1
: -(-lhs / rhs);
}
// Returns the floor-divide of |lhs| by non-zero |rhs|.
static inline iree_host_size_t iree_host_size_floor_div(iree_host_size_t lhs,
iree_host_size_t rhs) {
return ((lhs != 0) && ((lhs < 0) != (rhs < 0)))
? -((-lhs + ((rhs < 0) ? 1 : -1)) / rhs) - 1
: lhs / rhs;
}
// Returns the ceil-divide of |lhs| by non-zero |rhs|.
static inline iree_device_size_t iree_device_size_ceil_div(
iree_device_size_t lhs, iree_device_size_t rhs) {
return ((lhs != 0) && (lhs > 0) == (rhs > 0))
? ((lhs + ((rhs > 0) ? -1 : 1)) / rhs) + 1
: -(-lhs / rhs);
}
// Returns the floor-divide of |lhs| by non-zero |rhs|.
static inline iree_device_size_t iree_device_size_floor_div(
iree_device_size_t lhs, iree_device_size_t rhs) {
return ((lhs != 0) && ((lhs < 0) != (rhs < 0)))
? -((-lhs + ((rhs < 0) ? 1 : -1)) / rhs) - 1
: lhs / rhs;
}
// Returns the greatest common divisor between two values.
//
// See: https://en.wikipedia.org/wiki/Greatest_common_divisor
//
// Examples:
// gcd(8, 16) = 8
// gcd(3, 5) = 1
static inline iree_device_size_t iree_device_size_gcd(iree_device_size_t a,
iree_device_size_t b) {
if (b == 0) return a;
return iree_device_size_gcd(b, a % b);
}
// Returns the least common multiple between two values, often used for
// finding a common alignment.
//
// See: https://en.wikipedia.org/wiki/Least_common_multiple
//
// Examples:
// lcm(8, 16) = 16
// lcm(3, 5) = 15 (15 % 3 == 0, 15 % 5 == 0)
static inline iree_device_size_t iree_device_size_lcm(iree_device_size_t a,
iree_device_size_t b) {
return a * (b / iree_device_size_gcd(a, b));
}
//===----------------------------------------------------------------------===//
// Byte and page range manipulation
//===----------------------------------------------------------------------===//
// Defines a range of bytes with any arbitrary alignment.
// Most operations will adjust this range by the allocation granularity, meaning
// that a range that straddles a page boundary will be specifying multiple pages
// (such as offset=1, length=4096 with a page size of 4096 indicating 2 pages).
typedef struct iree_byte_range_t {
iree_host_size_t offset;
iree_host_size_t length;
} iree_byte_range_t;
// Defines a range of bytes with page-appropriate alignment.
// Any operation taking page ranges requires that the offset and length respect
// the size and granularity requirements of the page mode the memory was defined
// with. For example, if an allocation is using large pages then both offset and
// length must be multiples of the iree_memory_info_t::large_page_granularity.
typedef struct iree_page_range_t {
iree_host_size_t offset;
iree_host_size_t length;
} iree_page_range_t;
// Aligns |addr| up to |page_alignment|.
static inline uintptr_t iree_page_align_start(uintptr_t addr,
iree_host_size_t page_alignment) {
return addr & (~(page_alignment - 1));
}
// Aligns |addr| down to |page_alignment|.
static inline uintptr_t iree_page_align_end(uintptr_t addr,
iree_host_size_t page_alignment) {
return iree_page_align_start(addr + (page_alignment - 1), page_alignment);
}
// Unions two page ranges to create the min/max extents of both.
static inline iree_page_range_t iree_page_range_union(
const iree_page_range_t a, const iree_page_range_t b) {
iree_host_size_t start = iree_min(a.offset, b.offset);
iree_host_size_t end = iree_max(a.offset + a.length, b.offset + b.length);
return (iree_page_range_t){
/*.offset=*/start,
/*.length=*/end - start,
};
}
// Aligns a byte range to page boundaries defined by |page_alignment|.
static inline iree_page_range_t iree_align_byte_range_to_pages(
const iree_byte_range_t byte_range, iree_host_size_t page_alignment) {
return (iree_page_range_t){
/*.offset=*/iree_host_align(byte_range.offset, page_alignment),
/*.length=*/iree_host_align(byte_range.length, page_alignment),
};
}
// Computes a page-aligned range base and total length from a range.
// This will produce a starting address <= the range offset and a length >=
// the range length.
static inline void iree_page_align_range(void* base_address,
iree_byte_range_t range,
iree_host_size_t page_alignment,
void** out_start_address,
iree_host_size_t* out_aligned_length) {
void* range_start = (void*)iree_page_align_start(
(uintptr_t)base_address + range.offset, page_alignment);
void* range_end = (void*)iree_page_align_end(
(uintptr_t)base_address + range.offset + range.length, page_alignment);
*out_start_address = range_start;
*out_aligned_length =
(iree_host_size_t)range_end - (iree_host_size_t)range_start;
}
//===----------------------------------------------------------------------===//
// Alignment intrinsics
//===----------------------------------------------------------------------===//
#if IREE_HAVE_BUILTIN(__builtin_unreachable) || defined(__GNUC__)
#define IREE_BUILTIN_UNREACHABLE() __builtin_unreachable()
#elif defined(IREE_COMPILER_MSVC)
#define IREE_BUILTIN_UNREACHABLE() __assume(false)
#else
#define IREE_BUILTIN_UNREACHABLE() ((void)0)
#endif // IREE_HAVE_BUILTIN(__builtin_unreachable) || defined(__GNUC__)
#if !defined(__cplusplus)
#define IREE_DECLTYPE(v) __typeof__(v)
#else
#define IREE_DECLTYPE(v) decltype(v)
#endif // __cplusplus
#if IREE_HAVE_BUILTIN(__builtin_assume_aligned) || defined(__GNUC__)
// NOTE: gcc only assumes on the result so we have to reset ptr.
#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
(ptr = (IREE_DECLTYPE(ptr))(__builtin_assume_aligned((void*)(ptr), (size))))
#elif 0 // defined(IREE_COMPILER_MSVC)
#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
(__assume((((uintptr_t)(ptr)) & ((1 << (size))) - 1)) == 0)
#else
#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
((((uintptr_t)(ptr) % (size)) == 0) ? (ptr) \
: (IREE_BUILTIN_UNREACHABLE(), (ptr)))
#endif // IREE_HAVE_BUILTIN(__builtin_assume_aligned) || defined(__GNUC__)
//===----------------------------------------------------------------------===//
// Alignment-safe memory accesses
//===----------------------------------------------------------------------===//
// Map little-endian byte indices in memory to the host memory order indices.
#if defined(IREE_ENDIANNESS_LITTLE)
#define IREE_LE_IDX_1(i) (i)
#define IREE_LE_IDX_2(i) (i)
#define IREE_LE_IDX_4(i) (i)
#define IREE_LE_IDX_8(i) (i)
#else
#define IREE_LE_IDX_1(i) (i)
#define IREE_LE_IDX_2(i) (1 - (i))
#define IREE_LE_IDX_4(i) (3 - (i))
#define IREE_LE_IDX_8(i) (7 - (i))
#endif // IREE_ENDIANNESS_*
#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8
static inline uint8_t iree_unaligned_load_le_u8(const uint8_t* ptr) {
return *ptr;
}
static inline void iree_unaligned_store_le_u8(uint8_t* ptr, uint8_t value) {
*ptr = value;
}
#else
#if defined(IREE_ENDIANNESS_LITTLE)
#define iree_unaligned_load_le_u8(ptr) *(ptr)
#define iree_unaligned_store_le_u8(ptr, value) *(ptr) = (value)
#else
#error "TODO(benvanik): little-endian load/store for big-endian archs"
#endif // IREE_ENDIANNESS_*
#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8
#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16
static inline uint16_t iree_unaligned_load_le_u16(const uint16_t* ptr) {
const uint8_t* p = (const uint8_t*)ptr;
return ((uint16_t)p[IREE_LE_IDX_2(0)]) | ((uint16_t)p[IREE_LE_IDX_2(1)] << 8);
}
static inline void iree_unaligned_store_le_u16(uint16_t* ptr, uint16_t value) {
uint8_t* p = (uint8_t*)ptr;
p[IREE_LE_IDX_2(0)] = value;
p[IREE_LE_IDX_2(1)] = value >> 8;
}
#else
#if defined(IREE_ENDIANNESS_LITTLE)
#define iree_unaligned_load_le_u16(ptr) *(ptr)
#define iree_unaligned_store_le_u16(ptr, value) *(ptr) = (value)
#else
#error "TODO(benvanik): little-endian load/store for big-endian archs"
#endif // IREE_ENDIANNESS_*
#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16
#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32
static inline uint32_t iree_unaligned_load_le_u32(const uint32_t* ptr) {
const uint8_t* p = (const uint8_t*)ptr;
return ((uint32_t)p[IREE_LE_IDX_4(0)]) |
((uint32_t)p[IREE_LE_IDX_4(1)] << 8) |
((uint32_t)p[IREE_LE_IDX_4(2)] << 16) |
((uint32_t)p[IREE_LE_IDX_4(3)] << 24);
}
static inline float iree_unaligned_load_le_f32(const float* ptr) {
uint32_t uint_value = iree_unaligned_load_le_u32((const uint32_t*)ptr);
float value;
memcpy(&value, &uint_value, sizeof(value));
return value;
}
static inline void iree_unaligned_store_le_u32(uint32_t* ptr, uint32_t value) {
uint8_t* p = (uint8_t*)ptr;
p[IREE_LE_IDX_4(0)] = value;
p[IREE_LE_IDX_4(1)] = value >> 8;
p[IREE_LE_IDX_4(2)] = value >> 16;
p[IREE_LE_IDX_4(3)] = value >> 24;
}
static inline void iree_unaligned_store_le_f32(float* ptr, float value) {
uint32_t uint_value;
memcpy(&uint_value, &value, sizeof(value));
iree_unaligned_store_le_u32((uint32_t*)ptr, uint_value);
}
#else
#if defined(IREE_ENDIANNESS_LITTLE)
#define iree_unaligned_load_le_u32(ptr) *(ptr)
#define iree_unaligned_load_le_f32(ptr) *(ptr)
#define iree_unaligned_store_le_u32(ptr, value) *(ptr) = (value)
#define iree_unaligned_store_le_f32(ptr, value) *(ptr) = (value)
#else
#error "TODO(benvanik): little-endian load/store for big-endian archs"
#endif // IREE_ENDIANNESS_*
#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32
#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64
static inline uint64_t iree_unaligned_load_le_u64(const uint64_t* ptr) {
const uint8_t* p = (const uint8_t*)ptr;
return ((uint64_t)p[IREE_LE_IDX_8(0)]) |
((uint64_t)p[IREE_LE_IDX_8(1)] << 8) |
((uint64_t)p[IREE_LE_IDX_8(2)] << 16) |
((uint64_t)p[IREE_LE_IDX_8(3)] << 24) |
((uint64_t)p[IREE_LE_IDX_8(4)] << 32) |
((uint64_t)p[IREE_LE_IDX_8(5)] << 40) |
((uint64_t)p[IREE_LE_IDX_8(6)] << 48) |
((uint64_t)p[IREE_LE_IDX_8(7)] << 56);
}
static inline double iree_unaligned_load_le_f64(const double* ptr) {
uint64_t uint_value = iree_unaligned_load_le_u64((const uint64_t*)ptr);
double value;
memcpy(&value, &uint_value, sizeof(value));
return value;
}
static inline void iree_unaligned_store_le_u64(uint64_t* ptr, uint64_t value) {
uint8_t* p = (uint8_t*)ptr;
p[IREE_LE_IDX_8(0)] = value;
p[IREE_LE_IDX_8(1)] = value >> 8;
p[IREE_LE_IDX_8(2)] = value >> 16;
p[IREE_LE_IDX_8(3)] = value >> 24;
p[IREE_LE_IDX_8(4)] = value >> 32;
p[IREE_LE_IDX_8(5)] = value >> 40;
p[IREE_LE_IDX_8(6)] = value >> 48;
p[IREE_LE_IDX_8(7)] = value >> 56;
}
static inline void iree_unaligned_store_le_f64(double* ptr, double value) {
uint64_t uint_value;
memcpy(&uint_value, &value, sizeof(value));
iree_unaligned_store_le_u64((uint64_t*)ptr, uint_value);
}
#else
#if defined(IREE_ENDIANNESS_LITTLE)
#define iree_unaligned_load_le_u64(ptr) *(ptr)
#define iree_unaligned_load_le_f64(ptr) *(ptr)
#define iree_unaligned_store_le_u64(ptr, value) *(ptr) = (value)
#define iree_unaligned_store_le_f64(ptr, value) *(ptr) = (value)
#else
#error "TODO(benvanik): little-endian load/store for big-endian archs"
#endif // IREE_ENDIANNESS_*
#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64
// clang-format off
// Dereferences |ptr| and returns the value.
// Automatically handles unaligned accesses on architectures that may not
// support them natively (or efficiently). Memory is treated as little-endian.
#define iree_unaligned_load_le(ptr) \
_Generic((ptr), \
int8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
uint8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
int16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
uint16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
int32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
uint32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
int64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
uint64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
float*: iree_unaligned_load_le_f32((const float*)(ptr)), \
double*: iree_unaligned_load_le_f64((const double*)(ptr)) \
)
// Dereferences |ptr| and writes the given |value|.
// Automatically handles unaligned accesses on architectures that may not
// support them natively (or efficiently). Memory is treated as little-endian.
#define iree_unaligned_store(ptr, value) \
_Generic((ptr), \
int8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
uint8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
int16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
uint16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
int32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
uint32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
int64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
uint64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
float*: iree_unaligned_store_le_f32((float*)(ptr), value), \
double*: iree_unaligned_store_le_f64((double*)(ptr), value) \
)
// clang-format on
#ifdef __cplusplus
} // extern "C"
#endif
#endif // IREE_BASE_ALIGNMENT_H_