Adding iree_allocator_malloc_aligned & co.
These allow any underlying iree_allocator_t to allocate memory at
arbitrary alignments. Using library APIs like posix_memalign can be more
efficient but for the number of buffers we have live 0-64 bytes of
wastage is fine for the portability gains. We can always redirect the
_aligned methods down to allocators directly if we want to reduce this
waste in the future.
diff --git a/iree/base/alignment.h b/iree/base/alignment.h
index 6f32ffe..1fd0356 100644
--- a/iree/base/alignment.h
+++ b/iree/base/alignment.h
@@ -10,6 +10,7 @@
#ifndef IREE_BASE_ALIGNMENT_H_
#define IREE_BASE_ALIGNMENT_H_
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@@ -50,6 +51,12 @@
return (value + (alignment - 1)) & ~(alignment - 1);
}
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_host_size_has_alignment(iree_host_size_t value,
+ iree_host_size_t alignment) {
+ return iree_host_align(value, alignment) == value;
+}
+
// Aligns |value| up to the given power-of-two |alignment| if required.
// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
static inline iree_device_size_t iree_device_align(
@@ -57,6 +64,12 @@
return (value + (alignment - 1)) & ~(alignment - 1);
}
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_device_size_has_alignment(
+ iree_device_size_t value, iree_device_size_t alignment) {
+ return iree_device_align(value, alignment) == value;
+}
+
// Returns the size of a struct padded out to iree_max_align_t.
// This must be used when performing manual trailing allocation packing to
// ensure the alignment requirements of the trailing data are satisfied.
diff --git a/iree/base/allocator.c b/iree/base/allocator.c
index f52482f..d409370 100644
--- a/iree/base/allocator.c
+++ b/iree/base/allocator.c
@@ -10,6 +10,10 @@
#include "iree/base/api.h"
#include "iree/base/tracing.h"
+//===----------------------------------------------------------------------===//
+// iree_allocator_t (std::allocator-like interface)
+//===----------------------------------------------------------------------===//
+
static iree_status_t iree_allocator_issue_alloc(
iree_allocator_t allocator, iree_allocator_command_t command,
iree_host_size_t byte_length, void** inout_ptr) {
@@ -65,7 +69,7 @@
IREE_ASSERT_ARGUMENT(params);
IREE_ASSERT_ARGUMENT(inout_ptr);
iree_host_size_t byte_length = params->byte_length;
- if (byte_length == 0) {
+ if (IREE_UNLIKELY(byte_length == 0)) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"allocations must be >0 bytes");
}
@@ -128,3 +132,130 @@
"unsupported system allocator command");
}
}
+
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Returns true if |alignment| is a power of two (or 0).
+static inline iree_host_size_t iree_alignment_is_pot(
+ iree_host_size_t alignment) {
+ return (alignment & (alignment - 1)) == 0;
+}
+
+// Returns a pointer into |unaligned_ptr| where |offset| matches |alignment|.
+static inline void* iree_aligned_ptr(void* unaligned_ptr,
+ iree_host_size_t alignment,
+ iree_host_size_t offset) {
+ return (void*)((((uintptr_t)unaligned_ptr + (alignment + sizeof(void*)) +
+ offset) &
+ ~(uintptr_t)(alignment - 1)) -
+ offset);
+}
+
+// Returns the base unaligned pointer for |aligned_ptr|.
+static inline void* iree_aligned_ptr_get_base(void* aligned_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ return ptr_ref[-1];
+}
+
+// Sets the base unaligned pointer in |aligned_ptr|.
+static inline void iree_aligned_ptr_set_base(void* aligned_ptr,
+ void* base_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ ptr_ref[-1] = base_ptr;
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr) {
+ IREE_ASSERT_ARGUMENT(out_ptr);
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_max(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ void* unaligned_ptr = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_length, (void**)&unaligned_ptr));
+ void* aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *out_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(inout_ptr);
+ if (!*inout_ptr) {
+ return iree_allocator_malloc_aligned(allocator, byte_length, min_alignment,
+ offset, inout_ptr);
+ }
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_min(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+ void* aligned_ptr = *inout_ptr;
+ void* unaligned_ptr = iree_aligned_ptr_get_base(aligned_ptr);
+ if (IREE_UNLIKELY(aligned_ptr !=
+ iree_aligned_ptr(unaligned_ptr, alignment, offset))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "reallocation must have the same alignment as the "
+ "original allocation (got %" PRIhsz ")",
+ min_alignment);
+ }
+
+ // Since the reallocated memory block may have a different unaligned base to
+ // aligned offset we may need to move the data. Capture the original offset
+ // into the unaligned base where the valid data resides.
+ uintptr_t old_offset = (uintptr_t)aligned_ptr - (uintptr_t)unaligned_ptr;
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_realloc(allocator, total_length, (void**)&unaligned_ptr));
+ aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ const uint8_t* old_data = (uint8_t*)unaligned_ptr + old_offset;
+ uint8_t* new_data = (uint8_t*)aligned_ptr;
+ if (old_data != new_data) {
+ // Alignment at offset changed; copy data to the new aligned offset.
+ // NOTE: this is copying up to the *new* byte length, as we don't store the
+ // old length and don't know how much to copy. Since we've already
+ // reallocated we know this will always be in-bounds, but it's inefficient.
+ // NOTE: memmove instead of memcpy as the regions may overlap.
+ memmove(new_data, old_data, byte_length);
+ }
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *inout_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr) {
+ if (ptr) {
+ void* unaligned_ptr = iree_aligned_ptr_get_base(ptr);
+ iree_allocator_free(allocator, unaligned_ptr);
+ }
+}
diff --git a/iree/base/allocator.h b/iree/base/allocator.h
index 477c75d..9ac26f4 100644
--- a/iree/base/allocator.h
+++ b/iree/base/allocator.h
@@ -242,6 +242,43 @@
return allocator.ctl == NULL;
}
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Allocates memory of size |byte_length| where the byte starting at |offset|
+// has a minimum alignment of |min_alignment|. In many cases |offset| can be 0.
+//
+// The |offset| can be used to ensure the alignment-sensitive portion of a
+// combined allocation is aligned while any prefix metadata has system
+// alignment. For example:
+// typedef struct {
+// uint32_t some_metadata;
+// uint8_t data[];
+// } buffer_t;
+// buffer_t* buffer = NULL;
+// iree_allocator_malloc_aligned(allocator, sizeof(buffer_t) + length,
+// 4096, offsetof(buffer_t, data), &buffer);
+// // `buffer` has system alignment, but the `data` will be aligned on at
+// // least a 4096 boundary.
+//
+// The contents of the returned memory is guaranteed to be zeroed.
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr);
+
+// Reallocates memory to |byte_length|, growing or shrinking as needed.
+// Only valid on memory allocated with iree_allocator_malloc_aligned.
+// The newly reallocated memory will have the byte at |offset| aligned to at
+// least |min_alignment|.
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr);
+
+// Frees a |ptr| previously returned from iree_allocator_malloc_aligned.
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus