Adds iree_unaligned_load+store to emulate unaligned access. (#6573)
Possible fix for #6566.
diff --git a/iree/base/alignment.h b/iree/base/alignment.h
index 215db3c..5229d97 100644
--- a/iree/base/alignment.h
+++ b/iree/base/alignment.h
@@ -11,6 +11,8 @@
#define IREE_BASE_ALIGNMENT_H_
#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
#include "iree/base/config.h"
#include "iree/base/target_platform.h"
@@ -69,6 +71,164 @@
// IREE_CHECK_OK(iree_allocator_malloc(allocator, total_size, (void**)&p));
#define iree_sizeof_struct(t) iree_host_align(sizeof(t), iree_max_align_t)
+//===----------------------------------------------------------------------===//
+// Alignment-safe memory accesses
+//===----------------------------------------------------------------------===//
+
+// Map little-endian byte indices in memory to the host memory order indices.
+#if defined(IREE_ENDIANNESS_LITTLE)
+#define IREE_LE_IDX_1(i) (i)
+#define IREE_LE_IDX_2(i) (i)
+#define IREE_LE_IDX_4(i) (i)
+#define IREE_LE_IDX_8(i) (i)
+#else
+#define IREE_LE_IDX_1(i) (i)
+#define IREE_LE_IDX_2(i) (1 - (i))
+#define IREE_LE_IDX_4(i) (3 - (i))
+#define IREE_LE_IDX_8(i) (7 - (i))
+#endif // IREE_ENDIANNESS_*
+
+#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+static inline uint8_t iree_unaligned_load_le_u8(const uint8_t* ptr) {
+ return *ptr;
+}
+static inline uint16_t iree_unaligned_load_le_u16(const uint16_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint16_t)p[IREE_LE_IDX_2(0)]) | ((uint16_t)p[IREE_LE_IDX_2(1)] << 8);
+}
+static inline uint32_t iree_unaligned_load_le_u32(const uint32_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint32_t)p[IREE_LE_IDX_4(0)]) |
+ ((uint32_t)p[IREE_LE_IDX_4(1)] << 8) |
+ ((uint32_t)p[IREE_LE_IDX_4(2)] << 16) |
+ ((uint32_t)p[IREE_LE_IDX_4(3)] << 24);
+}
+static inline uint64_t iree_unaligned_load_le_u64(const uint64_t* ptr) {
+ const uint8_t* p = (const uint8_t*)ptr;
+ return ((uint64_t)p[IREE_LE_IDX_8(0)]) |
+ ((uint64_t)p[IREE_LE_IDX_8(1)] << 8) |
+ ((uint64_t)p[IREE_LE_IDX_8(2)] << 16) |
+ ((uint64_t)p[IREE_LE_IDX_8(3)] << 24) |
+ ((uint64_t)p[IREE_LE_IDX_8(4)] << 32) |
+ ((uint64_t)p[IREE_LE_IDX_8(5)] << 40) |
+ ((uint64_t)p[IREE_LE_IDX_8(6)] << 48) |
+ ((uint64_t)p[IREE_LE_IDX_8(7)] << 56);
+}
+static inline float iree_unaligned_load_le_f32(const float* ptr) {
+ uint32_t uint_value = iree_unaligned_load_le_u32((const uint32_t*)ptr);
+ float value;
+ memcpy(&value, &uint_value, sizeof(value));
+ return value;
+}
+static inline double iree_unaligned_load_le_f64(const double* ptr) {
+ uint64_t uint_value = iree_unaligned_load_le_u64((const uint64_t*)ptr);
+ double value;
+ memcpy(&value, &uint_value, sizeof(value));
+ return value;
+}
+
+static inline void iree_unaligned_store_le_u8(uint8_t* ptr, uint8_t value) {
+ *ptr = value;
+}
+static inline void iree_unaligned_store_le_u16(uint16_t* ptr, uint16_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_2(0)] = value;
+ p[IREE_LE_IDX_2(1)] = value >> 8;
+}
+static inline void iree_unaligned_store_le_u32(uint32_t* ptr, uint32_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_4(0)] = value;
+ p[IREE_LE_IDX_4(1)] = value >> 8;
+ p[IREE_LE_IDX_4(2)] = value >> 16;
+ p[IREE_LE_IDX_4(3)] = value >> 24;
+}
+static inline void iree_unaligned_store_le_u64(uint64_t* ptr, uint64_t value) {
+ uint8_t* p = (uint8_t*)ptr;
+ p[IREE_LE_IDX_8(0)] = value;
+ p[IREE_LE_IDX_8(1)] = value >> 8;
+ p[IREE_LE_IDX_8(2)] = value >> 16;
+ p[IREE_LE_IDX_8(3)] = value >> 24;
+ p[IREE_LE_IDX_8(4)] = value >> 32;
+ p[IREE_LE_IDX_8(5)] = value >> 40;
+ p[IREE_LE_IDX_8(6)] = value >> 48;
+ p[IREE_LE_IDX_8(7)] = value >> 56;
+}
+static inline void iree_unaligned_store_le_f32(float* ptr, float value) {
+ uint32_t uint_value;
+ memcpy(&uint_value, &value, sizeof(value));
+ iree_unaligned_store_le_u32((uint32_t*)ptr, uint_value);
+}
+static inline void iree_unaligned_store_le_f64(double* ptr, double value) {
+ uint64_t uint_value;
+ memcpy(&uint_value, &value, sizeof(value));
+ iree_unaligned_store_le_u64((uint64_t*)ptr, uint_value);
+}
+
+#else
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+
+#define iree_unaligned_load_le_u8(ptr) *(ptr)
+#define iree_unaligned_load_le_u16(ptr) *(ptr)
+#define iree_unaligned_load_le_u32(ptr) *(ptr)
+#define iree_unaligned_load_le_u64(ptr) *(ptr)
+#define iree_unaligned_load_le_f32(ptr) *(ptr)
+#define iree_unaligned_load_le_f64(ptr) *(ptr)
+
+#define iree_unaligned_store_le_u8(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u16(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u32(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_u64(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_f32(ptr, value) *(ptr) = (value)
+#define iree_unaligned_store_le_f64(ptr, value) *(ptr) = (value)
+
+#else
+
+#error "TODO(benvanik): little-endian load/store for big-endian archs"
+
+#endif // IREE_ENDIANNESS_*
+
+#endif // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+// clang-format off
+
+// Dereferences |ptr| and returns the value.
+// Automatically handles unaligned accesses on architectures that may not
+// support them natively (or efficiently). Memory is treated as little-endian.
+#define iree_unaligned_load_le(ptr) \
+ _Generic((ptr), \
+ int8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
+ uint8_t*: iree_unaligned_load_le_u8((const uint8_t*)(ptr)), \
+ int16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
+ uint16_t*: iree_unaligned_load_le_u16((const uint16_t*)(ptr)), \
+ int32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
+ uint32_t*: iree_unaligned_load_le_u32((const uint32_t*)(ptr)), \
+ int64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
+ uint64_t*: iree_unaligned_load_le_u64((const uint64_t*)(ptr)), \
+ float*: iree_unaligned_load_le_f32((const float*)(ptr)), \
+ double*: iree_unaligned_load_le_f64((const double*)(ptr)) \
+ )
+
+// Dereferences |ptr| and writes the given |value|.
+// Automatically handles unaligned accesses on architectures that may not
+// support them natively (or efficiently). Memory is treated as little-endian.
+#define iree_unaligned_store(ptr, value) \
+ _Generic((ptr), \
+ int8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
+ uint8_t*: iree_unaligned_store_le_u8((uint8_t*)(ptr), value), \
+ int16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
+ uint16_t*: iree_unaligned_store_le_u16((uint16_t*)(ptr), value), \
+ int32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
+ uint32_t*: iree_unaligned_store_le_u32((uint32_t*)(ptr), value), \
+ int64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
+ uint64_t*: iree_unaligned_store_le_u64((uint64_t*)(ptr), value), \
+ float*: iree_unaligned_store_le_f32((float*)(ptr), value), \
+ double*: iree_unaligned_store_le_f64((double*)(ptr), value) \
+ )
+
+// clang-format on
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/iree/base/target_platform.h b/iree/base/target_platform.h
index 4277d71..8052967 100644
--- a/iree/base/target_platform.h
+++ b/iree/base/target_platform.h
@@ -30,6 +30,8 @@
// IREE_ENDIANNESS_LITTLE
// IREE_ENDIANNESS_BIG
//
+// IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED (0/1)
+//
// IREE_COMPILER_CLANG
// IREE_COMPILER_GCC
// IREE_COMPILER_GCC_COMPAT
@@ -138,6 +140,39 @@
#endif // __BYTE_ORDER__
//==============================================================================
+// IREE_MEMORY_ACCESS_*
+//==============================================================================
+// Certain architectures have specific memory access requirements that require
+// user-mode code changes to work at all or work at reasonable performance.
+
+#if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED)
+
+#if defined(IREE_ARCH_ARM_32) || defined(IREE_ARCH_ARM_64)
+
+// Armv6‑M and Armv8-M (w/o the main extension) do not support unaligned access.
+// The -munaligned-access and -mno-unaligned-access flags control this.
+// https://www.keil.com/support/man/docs/armclang_ref/armclang_ref_sam1444138667173.htm
+#if !defined(__ARM_FEATURE_UNALIGNED)
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 1
+#else
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 0
+#endif // !__ARM_FEATURE_UNALIGNED
+
+#elif defined(IREE_ARCH_RISCV_32) || defined(IREE_ARCH_RISCV_64)
+
+// Though unaligned access is part of the base spec it is allowed to be
+// implemented with trap handlers. Bare-metal systems likely won't have these
+// handlers and even on systems that do (linux) we don't want to be trapping for
+// every load/store.
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 1
+
+#endif // IREE_ARCH_*
+
+#else
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 0
+#endif // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+
+//==============================================================================
// IREE_COMPILER_*
//==============================================================================
diff --git a/iree/vm/bytecode_dispatch_util.h b/iree/vm/bytecode_dispatch_util.h
index 2e692be..596968e 100644
--- a/iree/vm/bytecode_dispatch_util.h
+++ b/iree/vm/bytecode_dispatch_util.h
@@ -153,34 +153,12 @@
// Bytecode data access macros for reading values of a given type from a byte
// offset within the current function.
-#if defined(IREE_ENDIANNESS_LITTLE)
-#define OP_I8(i) bytecode_data[pc + (i)]
-#define OP_I16(i) *((uint16_t*)&bytecode_data[pc + (i)])
-#define OP_I32(i) *((uint32_t*)&bytecode_data[pc + (i)])
-#define OP_I64(i) *((uint64_t*)&bytecode_data[pc + (i)])
-#define OP_F32(i) *((float*)&bytecode_data[pc + (i)])
-#define OP_F64(i) *((double*)&bytecode_data[pc + (i)])
-#else
-#define OP_I8(i) bytecode_data[pc + (i)]
-#define OP_I16(i) \
- ((uint16_t)bytecode_data[pc + 0 + (i)]) | \
- ((uint16_t)bytecode_data[pc + 1 + (i)] << 8)
-#define OP_I32(i) \
- ((uint32_t)bytecode_data[pc + 0 + (i)]) | \
- ((uint32_t)bytecode_data[pc + 1 + (i)] << 8) | \
- ((uint32_t)bytecode_data[pc + 2 + (i)] << 16) | \
- ((uint32_t)bytecode_data[pc + 3 + (i)] << 24)
-#define OP_I64(i) \
- ((uint64_t)bytecode_data[pc + 0 + (i)]) | \
- ((uint64_t)bytecode_data[pc + 1 + (i)] << 8) | \
- ((uint64_t)bytecode_data[pc + 2 + (i)] << 16) | \
- ((uint64_t)bytecode_data[pc + 3 + (i)] << 24) | \
- ((uint64_t)bytecode_data[pc + 4 + (i)] << 32) | \
- ((uint64_t)bytecode_data[pc + 5 + (i)] << 40) | \
- ((uint64_t)bytecode_data[pc + 6 + (i)] << 48) | \
- ((uint64_t)bytecode_data[pc + 7 + (i)] << 56)
-#error "TODO: OP_F32 and OP_F64 for big endian systems"
-#endif // IREE_ENDIANNESS_LITTLE
+#define OP_I8(i) iree_unaligned_load_le((uint8_t*)&bytecode_data[pc + (i)])
+#define OP_I16(i) iree_unaligned_load_le((uint16_t*)&bytecode_data[pc + (i)])
+#define OP_I32(i) iree_unaligned_load_le((uint32_t*)&bytecode_data[pc + (i)])
+#define OP_I64(i) iree_unaligned_load_le((uint64_t*)&bytecode_data[pc + (i)])
+#define OP_F32(i) iree_unaligned_load_le((float*)&bytecode_data[pc + (i)])
+#define OP_F64(i) iree_unaligned_load_le((double*)&bytecode_data[pc + (i)])
//===----------------------------------------------------------------------===//
// Utilities matching the tablegen op encoding scheme