Merge pull request #8534 from google/benvanik-align-64
Align HAL heap buffers to a configuration-defined minimum alignment.
diff --git a/iree/base/alignment.h b/iree/base/alignment.h
index 6f32ffe..1fd0356 100644
--- a/iree/base/alignment.h
+++ b/iree/base/alignment.h
@@ -10,6 +10,7 @@
#ifndef IREE_BASE_ALIGNMENT_H_
#define IREE_BASE_ALIGNMENT_H_
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@@ -50,6 +51,12 @@
return (value + (alignment - 1)) & ~(alignment - 1);
}
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_host_size_has_alignment(iree_host_size_t value,
+ iree_host_size_t alignment) {
+ return iree_host_align(value, alignment) == value;
+}
+
// Aligns |value| up to the given power-of-two |alignment| if required.
// https://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
static inline iree_device_size_t iree_device_align(
@@ -57,6 +64,12 @@
return (value + (alignment - 1)) & ~(alignment - 1);
}
+// Returns true if |value| matches the given minimum |alignment|.
+static inline bool iree_device_size_has_alignment(
+ iree_device_size_t value, iree_device_size_t alignment) {
+ return iree_device_align(value, alignment) == value;
+}
+
// Returns the size of a struct padded out to iree_max_align_t.
// This must be used when performing manual trailing allocation packing to
// ensure the alignment requirements of the trailing data are satisfied.
diff --git a/iree/base/allocator.c b/iree/base/allocator.c
index f52482f..d409370 100644
--- a/iree/base/allocator.c
+++ b/iree/base/allocator.c
@@ -10,6 +10,10 @@
#include "iree/base/api.h"
#include "iree/base/tracing.h"
+//===----------------------------------------------------------------------===//
+// iree_allocator_t (std::allocator-like interface)
+//===----------------------------------------------------------------------===//
+
static iree_status_t iree_allocator_issue_alloc(
iree_allocator_t allocator, iree_allocator_command_t command,
iree_host_size_t byte_length, void** inout_ptr) {
@@ -65,7 +69,7 @@
IREE_ASSERT_ARGUMENT(params);
IREE_ASSERT_ARGUMENT(inout_ptr);
iree_host_size_t byte_length = params->byte_length;
- if (byte_length == 0) {
+ if (IREE_UNLIKELY(byte_length == 0)) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"allocations must be >0 bytes");
}
@@ -128,3 +132,130 @@
"unsupported system allocator command");
}
}
+
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Returns true if |alignment| is a power of two (or 0).
+static inline iree_host_size_t iree_alignment_is_pot(
+ iree_host_size_t alignment) {
+ return (alignment & (alignment - 1)) == 0;
+}
+
+// Returns a pointer into |unaligned_ptr| where |offset| matches |alignment|.
+static inline void* iree_aligned_ptr(void* unaligned_ptr,
+ iree_host_size_t alignment,
+ iree_host_size_t offset) {
+ return (void*)((((uintptr_t)unaligned_ptr + (alignment + sizeof(void*)) +
+ offset) &
+ ~(uintptr_t)(alignment - 1)) -
+ offset);
+}
+
+// Returns the base unaligned pointer for |aligned_ptr|.
+static inline void* iree_aligned_ptr_get_base(void* aligned_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ return ptr_ref[-1];
+}
+
+// Sets the base unaligned pointer in |aligned_ptr|.
+static inline void iree_aligned_ptr_set_base(void* aligned_ptr,
+ void* base_ptr) {
+ void** ptr_ref =
+ (void**)((uintptr_t)aligned_ptr & ~(uintptr_t)(sizeof(void*) - 1));
+ ptr_ref[-1] = base_ptr;
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr) {
+ IREE_ASSERT_ARGUMENT(out_ptr);
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_max(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ void* unaligned_ptr = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_malloc(allocator, total_length, (void**)&unaligned_ptr));
+ void* aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *out_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr) {
+ IREE_ASSERT_ARGUMENT(inout_ptr);
+ if (!*inout_ptr) {
+ return iree_allocator_malloc_aligned(allocator, byte_length, min_alignment,
+ offset, inout_ptr);
+ }
+ if (IREE_UNLIKELY(byte_length == 0)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "allocations must be >0 bytes");
+ }
+ const iree_host_size_t alignment = iree_min(min_alignment, iree_max_align_t);
+ if (IREE_UNLIKELY(!iree_alignment_is_pot(alignment))) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "alignments must be powers of two (got %" PRIhsz ")", min_alignment);
+ }
+ void* aligned_ptr = *inout_ptr;
+ void* unaligned_ptr = iree_aligned_ptr_get_base(aligned_ptr);
+ if (IREE_UNLIKELY(aligned_ptr !=
+ iree_aligned_ptr(unaligned_ptr, alignment, offset))) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "reallocation must have the same alignment as the "
+ "original allocation (got %" PRIhsz ")",
+ min_alignment);
+ }
+
+ // Since the reallocated memory block may have a different unaligned base to
+ // aligned offset we may need to move the data. Capture the original offset
+ // into the unaligned base where the valid data resides.
+ uintptr_t old_offset = (uintptr_t)aligned_ptr - (uintptr_t)unaligned_ptr;
+
+ // [base ptr] [padding...] [aligned data] [padding...]
+ const iree_host_size_t total_length =
+ sizeof(uintptr_t) + byte_length + alignment;
+ IREE_RETURN_IF_ERROR(
+ iree_allocator_realloc(allocator, total_length, (void**)&unaligned_ptr));
+ aligned_ptr = iree_aligned_ptr(unaligned_ptr, alignment, offset);
+
+ const uint8_t* old_data = (uint8_t*)unaligned_ptr + old_offset;
+ uint8_t* new_data = (uint8_t*)aligned_ptr;
+ if (old_data != new_data) {
+ // Alignment at offset changed; copy data to the new aligned offset.
+ // NOTE: this is copying up to the *new* byte length, as we don't store the
+ // old length and don't know how much to copy. Since we've already
+ // reallocated we know this will always be in-bounds, but it's inefficient.
+ // NOTE: memmove instead of memcpy as the regions may overlap.
+ memmove(new_data, old_data, byte_length);
+ }
+
+ iree_aligned_ptr_set_base(aligned_ptr, unaligned_ptr);
+ *inout_ptr = aligned_ptr;
+ return iree_ok_status();
+}
+
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr) {
+ if (ptr) {
+ void* unaligned_ptr = iree_aligned_ptr_get_base(ptr);
+ iree_allocator_free(allocator, unaligned_ptr);
+ }
+}
diff --git a/iree/base/allocator.h b/iree/base/allocator.h
index 6b71d54..9ac26f4 100644
--- a/iree/base/allocator.h
+++ b/iree/base/allocator.h
@@ -117,46 +117,6 @@
#endif // IREE_COMPILER_MSVC
//===----------------------------------------------------------------------===//
-// C11 aligned_alloc compatibility shim
-//===----------------------------------------------------------------------===//
-
-#if defined(IREE_PLATFORM_WINDOWS)
-// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc
-#define iree_aligned_alloc(alignment, size) _aligned_malloc(size, alignment)
-#define iree_aligned_free(p) _aligned_free(p)
-#elif defined(_ISOC11_SOURCE)
-// https://en.cppreference.com/w/c/memory/aligned_alloc
-#define iree_aligned_alloc(alignment, size) aligned_alloc(alignment, size)
-#define iree_aligned_free(p) free(p)
-#elif _POSIX_C_SOURCE >= 200112L
-// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
-static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
- void* ptr = NULL;
- return posix_memalign(&ptr, alignment, size) == 0 ? ptr : NULL;
-}
-#define iree_aligned_free(p) free(p)
-#else
-// Emulates alignment with normal malloc. We overallocate by at least the
-// alignment + the size of a pointer, store the base pointer at p[-1], and
-// return the aligned pointer. This lets us easily get the base pointer in free
-// to pass back to the system.
-static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
- void* base_ptr = malloc(size + alignment + sizeof(uintptr_t));
- if (!base_ptr) return NULL;
- uintptr_t* aligned_ptr = (uintptr_t*)iree_host_align(
- (uintptr_t)base_ptr + sizeof(uintptr_t), alignment);
- aligned_ptr[-1] = (uintptr_t)base_ptr;
- return aligned_ptr;
-}
-static inline void iree_aligned_free(void* p) {
- if (IREE_UNLIKELY(!p)) return;
- uintptr_t* aligned_ptr = (uintptr_t*)p;
- void* base_ptr = (void*)aligned_ptr[-1];
- free(base_ptr);
-}
-#endif // IREE_PLATFORM_WINDOWS
-
-//===----------------------------------------------------------------------===//
// iree_allocator_t (std::allocator-like interface)
//===----------------------------------------------------------------------===//
@@ -246,6 +206,7 @@
// If the reallocation fails then the original |inout_ptr| is unmodified.
//
// WARNING: when extending the newly allocated bytes are undefined.
+// TODO(benvanik): make them zeros; we should have an _uninitialized if needed.
IREE_API_EXPORT iree_status_t iree_allocator_realloc(
iree_allocator_t allocator, iree_host_size_t byte_length, void** inout_ptr);
@@ -281,6 +242,43 @@
return allocator.ctl == NULL;
}
+//===----------------------------------------------------------------------===//
+// Aligned allocations via iree_allocator_t
+//===----------------------------------------------------------------------===//
+
+// Allocates memory of size |byte_length| where the byte starting at |offset|
+// has a minimum alignment of |min_alignment|. In many cases |offset| can be 0.
+//
+// The |offset| can be used to ensure the alignment-sensitive portion of a
+// combined allocation is aligned while any prefix metadata has system
+// alignment. For example:
+// typedef struct {
+// uint32_t some_metadata;
+// uint8_t data[];
+// } buffer_t;
+// buffer_t* buffer = NULL;
+// iree_allocator_malloc_aligned(allocator, sizeof(buffer_t) + length,
+// 4096, offsetof(buffer_t, data), &buffer);
+// // `buffer` has system alignment, but the `data` will be aligned on at
+// // least a 4096 boundary.
+//
+// The contents of the returned memory is guaranteed to be zeroed.
+IREE_API_EXPORT iree_status_t iree_allocator_malloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** out_ptr);
+
+// Reallocates memory to |byte_length|, growing or shrinking as needed.
+// Only valid on memory allocated with iree_allocator_malloc_aligned.
+// The newly reallocated memory will have the byte at |offset| aligned to at
+// least |min_alignment|.
+IREE_API_EXPORT iree_status_t iree_allocator_realloc_aligned(
+ iree_allocator_t allocator, iree_host_size_t byte_length,
+ iree_host_size_t min_alignment, iree_host_size_t offset, void** inout_ptr);
+
+// Frees a |ptr| previously returned from iree_allocator_malloc_aligned.
+IREE_API_EXPORT void iree_allocator_free_aligned(iree_allocator_t allocator,
+ void* ptr);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/iree/base/config.h b/iree/base/config.h
index 022a522..e82c02d 100644
--- a/iree/base/config.h
+++ b/iree/base/config.h
@@ -155,6 +155,14 @@
// Enables optional HAL features. Each of these may add several KB to the final
// binary when linked dynamically.
+#if !defined(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+// Power of two byte alignment required on all host heap buffers.
+// Executables are compiled with alignment expectations and the runtime
+// alignment must be greater than or equal to the alignment set in the compiler.
+// External buffers wrapped by HAL buffers must meet this alignment requirement.
+#define IREE_HAL_HEAP_BUFFER_ALIGNMENT 64
+#endif // IREE_HAL_HEAP_BUFFER_ALIGNMENT
+
#if !defined(IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE)
// Enables additional validation of commands issued against command buffers.
// This adds small amounts of per-command overhead but in all but the most
diff --git a/iree/base/status.c b/iree/base/status.c
index 341630e..d71ba53 100644
--- a/iree/base/status.c
+++ b/iree/base/status.c
@@ -23,6 +23,46 @@
#include "iree/base/tracing.h"
//===----------------------------------------------------------------------===//
+// C11 aligned_alloc compatibility shim
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_PLATFORM_WINDOWS)
+// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc
+#define iree_aligned_alloc(alignment, size) _aligned_malloc(size, alignment)
+#define iree_aligned_free(p) _aligned_free(p)
+#elif defined(_ISOC11_SOURCE)
+// https://en.cppreference.com/w/c/memory/aligned_alloc
+#define iree_aligned_alloc(alignment, size) aligned_alloc(alignment, size)
+#define iree_aligned_free(p) free(p)
+#elif _POSIX_C_SOURCE >= 200112L
+// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
+static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
+ void* ptr = NULL;
+ return posix_memalign(&ptr, alignment, size) == 0 ? ptr : NULL;
+}
+#define iree_aligned_free(p) free(p)
+#else
+// Emulates alignment with normal malloc. We overallocate by at least the
+// alignment + the size of a pointer, store the base pointer at p[-1], and
+// return the aligned pointer. This lets us easily get the base pointer in free
+// to pass back to the system.
+static inline void* iree_aligned_alloc(size_t alignment, size_t size) {
+ void* base_ptr = malloc(size + alignment + sizeof(uintptr_t));
+ if (!base_ptr) return NULL;
+ uintptr_t* aligned_ptr = (uintptr_t*)iree_host_align(
+ (uintptr_t)base_ptr + sizeof(uintptr_t), alignment);
+ aligned_ptr[-1] = (uintptr_t)base_ptr;
+ return aligned_ptr;
+}
+static inline void iree_aligned_free(void* p) {
+ if (IREE_UNLIKELY(!p)) return;
+ uintptr_t* aligned_ptr = (uintptr_t*)p;
+ void* base_ptr = (void*)aligned_ptr[-1];
+ free(base_ptr);
+}
+#endif // IREE_PLATFORM_WINDOWS
+
+//===----------------------------------------------------------------------===//
// iree_status_t canonical errors
//===----------------------------------------------------------------------===//
diff --git a/iree/hal/buffer.h b/iree/hal/buffer.h
index f41e2a5..fb23ce0 100644
--- a/iree/hal/buffer.h
+++ b/iree/hal/buffer.h
@@ -517,22 +517,6 @@
iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
//===----------------------------------------------------------------------===//
-// iree_hal_heap_buffer_t
-//===----------------------------------------------------------------------===//
-
-// Wraps an existing host allocation in a buffer.
-// When the buffer is destroyed the provided |data_allocator| will be used to
-// free |data|. Pass iree_allocator_null() to wrap without ownership semantics.
-//
-// |out_buffer| must be released by the caller.
-IREE_API_EXPORT iree_status_t iree_hal_heap_buffer_wrap(
- iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
- iree_hal_memory_access_t allowed_access,
- iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
- iree_byte_span_t data, iree_allocator_t data_allocator,
- iree_hal_buffer_t** out_buffer);
-
-//===----------------------------------------------------------------------===//
// iree_hal_buffer_t implementation details
//===----------------------------------------------------------------------===//
diff --git a/iree/hal/buffer_heap.c b/iree/hal/buffer_heap.c
index 7d3ec2f..e2c3ee8 100644
--- a/iree/hal/buffer_heap.c
+++ b/iree/hal/buffer_heap.c
@@ -27,6 +27,31 @@
static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable;
+enum {
+ IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED = 1u << 0,
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED = 1u << 1,
+ IREE_HAL_HEAP_BUFFER_FLAG_MASK = IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED |
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED,
+};
+
+static inline uint8_t* iree_hal_heap_buffer_ptr(
+ const iree_hal_heap_buffer_t* buffer) {
+ return (uint8_t*)((uintptr_t)buffer->data.data &
+ ~IREE_HAL_HEAP_BUFFER_FLAG_MASK);
+}
+
+static inline bool iree_hal_heap_buffer_data_is_aligned(
+ const iree_hal_heap_buffer_t* buffer) {
+ return iree_any_bit_set((uintptr_t)buffer->data.data,
+ IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED);
+}
+
+static inline bool iree_hal_heap_buffer_metadata_is_aligned(
+ const iree_hal_heap_buffer_t* buffer) {
+ return iree_any_bit_set((uintptr_t)buffer->data.data,
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED);
+}
+
// Allocates a buffer with the metadata and storage split.
// This results in an additional host allocation but allows for user-overridden
// data storage allocations.
@@ -35,16 +60,23 @@
iree_allocator_t host_allocator, iree_hal_heap_buffer_t** out_buffer,
iree_byte_span_t* out_data) {
// Try allocating the storage first as it's the most likely to fail if OOM.
+ // It must be aligned to the minimum buffer alignment.
out_data->data_length = allocation_size;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(data_allocator, allocation_size,
- (void**)&out_data->data));
+ uintptr_t data_ptr = 0;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ data_allocator, allocation_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT,
+ /*offset=*/0, (void**)&data_ptr));
+ IREE_ASSERT_TRUE(
+ iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ data_ptr |= IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED;
+ out_data->data = (uint8_t*)data_ptr;
- // Allocate the host metadata wrapper.
+ // Allocate the host metadata wrapper with natural alignment.
iree_status_t status = iree_allocator_malloc(
host_allocator, sizeof(**out_buffer), (void**)out_buffer);
if (!iree_status_is_ok(status)) {
// Need to free the storage we just allocated.
- iree_allocator_free(data_allocator, out_data->data);
+ iree_allocator_free_aligned(data_allocator, out_data->data);
}
return status;
}
@@ -55,16 +87,29 @@
static iree_status_t iree_hal_heap_buffer_allocate_slab(
iree_device_size_t allocation_size, iree_allocator_t host_allocator,
iree_hal_heap_buffer_t** out_buffer, iree_byte_span_t* out_data) {
- // NOTE: we want the buffer data to always be 16-byte aligned.
+ // The metadata header is always aligned and we want to ensure it's padded
+ // out to the max alignment.
iree_hal_heap_buffer_t* buffer = NULL;
iree_host_size_t header_size =
- iree_host_align(iree_sizeof_struct(*buffer), 16);
+ iree_host_align(iree_sizeof_struct(*buffer), iree_max_align_t);
iree_host_size_t total_size = header_size + allocation_size;
- IREE_RETURN_IF_ERROR(
- iree_allocator_malloc(host_allocator, total_size, (void**)&buffer));
+
+ // Allocate with the data starting at offset header_size aligned to the
+ // minimum required buffer alignment. The header itself will still be aligned
+ // to the natural alignment but our buffer alignment is often much larger.
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ host_allocator, total_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT, header_size,
+ (void**)&buffer));
*out_buffer = buffer;
- *out_data =
- iree_make_byte_span((uint8_t*)buffer + header_size, allocation_size);
+
+ // Set bit indicating that we need to free the metadata with
+ // iree_allocator_free_aligned.
+ uintptr_t data_ptr = (uintptr_t)buffer + header_size;
+ IREE_ASSERT_TRUE(
+ iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ data_ptr |= IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED;
+ *out_data = iree_make_byte_span((uint8_t*)data_ptr, allocation_size);
+
return iree_ok_status();
}
@@ -82,7 +127,7 @@
// If the data and host allocators are the same we can allocate more
// efficiently as a large slab. Otherwise we need to allocate both the
// metadata and the storage independently.
- bool same_allocator =
+ const bool same_allocator =
memcmp(&data_allocator, &host_allocator, sizeof(data_allocator)) == 0;
iree_hal_heap_buffer_t* buffer = NULL;
@@ -120,16 +165,27 @@
return status;
}
-IREE_API_EXPORT iree_status_t iree_hal_heap_buffer_wrap(
- iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
- iree_hal_memory_access_t allowed_access,
- iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
- iree_byte_span_t data, iree_allocator_t data_allocator,
- iree_hal_buffer_t** out_buffer) {
+iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
+ iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ iree_device_size_t allocation_size,
+ iree_byte_span_t data,
+ iree_allocator_t data_allocator,
+ iree_hal_buffer_t** out_buffer) {
IREE_ASSERT_ARGUMENT(allocator);
IREE_ASSERT_ARGUMENT(out_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
+ uintptr_t data_ptr = (uintptr_t)data.data & ~IREE_HAL_HEAP_BUFFER_FLAG_MASK;
+ if (!iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "imported heap buffer data must be aligned to %d; got %p",
+ (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, (void*)data_ptr);
+ }
+
iree_allocator_t host_allocator =
iree_hal_allocator_host_allocator(allocator);
iree_hal_heap_buffer_t* buffer = NULL;
@@ -164,8 +220,16 @@
}
});
- iree_allocator_free(buffer->data_allocator, buffer->data.data);
- iree_allocator_free(host_allocator, buffer);
+ if (iree_hal_heap_buffer_data_is_aligned(buffer)) {
+ iree_allocator_free_aligned(buffer->data_allocator, buffer->data.data);
+ } else {
+ iree_allocator_free(buffer->data_allocator, buffer->data.data);
+ }
+ if (iree_hal_heap_buffer_metadata_is_aligned(buffer)) {
+ iree_allocator_free_aligned(host_allocator, buffer);
+ } else {
+ iree_allocator_free(host_allocator, buffer);
+ }
IREE_TRACE_ZONE_END(z0);
}
@@ -176,8 +240,8 @@
iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
- mapping->contents = iree_make_byte_span(buffer->data.data + local_byte_offset,
- local_byte_length);
+ mapping->contents = iree_make_byte_span(
+ iree_hal_heap_buffer_ptr(buffer) + local_byte_offset, local_byte_length);
// If we mapped for discard scribble over the bytes. This is not a mandated
// behavior but it will make debugging issues easier. Alternatively for
diff --git a/iree/hal/buffer_heap_impl.h b/iree/hal/buffer_heap_impl.h
index 5a2c5ac..5ade6e6 100644
--- a/iree/hal/buffer_heap_impl.h
+++ b/iree/hal/buffer_heap_impl.h
@@ -38,6 +38,26 @@
iree_allocator_t data_allocator, iree_allocator_t host_allocator,
iree_hal_buffer_t** out_buffer);
+// Wraps an existing host allocation in a buffer.
+// When the buffer is destroyed the provided |data_allocator| will be used to
+// free |data| using iree_allocator_free. Pass iree_allocator_null() to wrap
+// without ownership semantics.
+//
+// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT.
+// Note that it will be freed as a normal unaligned allocation. If we find
+// ourselves wanting to wrap aligned allocations requiring
+// iree_allocator_free_aligned then we'll need a flag to indicate that.
+//
+// |out_buffer| must be released by the caller.
+iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
+ iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ iree_device_size_t allocation_size,
+ iree_byte_span_t data,
+ iree_allocator_t data_allocator,
+ iree_hal_buffer_t** out_buffer);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/iree/hal/cts/buffer_mapping_test.h b/iree/hal/cts/buffer_mapping_test.h
index 652d010..0019c67 100644
--- a/iree/hal/cts/buffer_mapping_test.h
+++ b/iree/hal/cts/buffer_mapping_test.h
@@ -541,7 +541,6 @@
}
// TODO(scotttodd): iree_hal_allocator_wrap_buffer
-// TODO(scotttodd): iree_hal_heap_buffer_wrap
} // namespace cts
} // namespace hal
diff --git a/iree/runtime/demo/hello_world_explained.c b/iree/runtime/demo/hello_world_explained.c
index 886aec1..2a981aa 100644
--- a/iree/runtime/demo/hello_world_explained.c
+++ b/iree/runtime/demo/hello_world_explained.c
@@ -191,7 +191,8 @@
iree_hal_buffer_view_t* arg0 = NULL;
if (iree_status_is_ok(status)) {
static const iree_hal_dim_t arg0_shape[1] = {4};
- static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ static const float iree_alignas(64)
+ arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
status = iree_hal_buffer_view_wrap_or_clone_heap_buffer(
device_allocator,
// Shape dimensions and rank:
@@ -230,7 +231,8 @@
iree_hal_buffer_view_t* arg1 = NULL;
if (iree_status_is_ok(status)) {
static const iree_hal_dim_t arg1_shape[1] = {4};
- static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ static const float iree_alignas(64)
+ arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
status = iree_hal_buffer_view_wrap_or_clone_heap_buffer(
device_allocator, arg1_shape, IREE_ARRAYSIZE(arg1_shape),
IREE_HAL_ELEMENT_TYPE_FLOAT_32,
diff --git a/iree/runtime/demo/hello_world_terse.c b/iree/runtime/demo/hello_world_terse.c
index 2f715bc..121a2e5 100644
--- a/iree/runtime/demo/hello_world_terse.c
+++ b/iree/runtime/demo/hello_world_terse.c
@@ -78,7 +78,8 @@
// %arg0: tensor<4xf32>
iree_hal_buffer_view_t* arg0 = NULL;
static const iree_hal_dim_t arg0_shape[1] = {4};
- static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+ static const float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+ arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
IREE_CHECK_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
iree_runtime_session_device_allocator(session), arg0_shape,
IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
@@ -102,7 +103,8 @@
// %arg1: tensor<4xf32>
iree_hal_buffer_view_t* arg1 = NULL;
static const iree_hal_dim_t arg1_shape[1] = {4};
- static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+ static const float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+ arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
IREE_CHECK_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
iree_runtime_session_device_allocator(session), arg1_shape,
IREE_ARRAYSIZE(arg1_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
diff --git a/iree/samples/custom_modules/custom_modules_test.cc b/iree/samples/custom_modules/custom_modules_test.cc
index cf4e306..df2f190 100644
--- a/iree/samples/custom_modules/custom_modules_test.cc
+++ b/iree/samples/custom_modules/custom_modules_test.cc
@@ -131,8 +131,8 @@
TEST_F(CustomModulesTest, PrintTensor) {
// Allocate the buffer we'll be printing.
static iree_hal_dim_t kShape[] = {2, 4};
- static float kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f,
- 4.0f, 5.0f, 6.0f, 7.0f};
+ static float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+ kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
iree_hal_buffer_params_t params = {0};
params.type =
IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
@@ -179,8 +179,8 @@
TEST_F(CustomModulesTest, RoundTripTensor) {
// Allocate the buffer we'll be printing/parsing.
static iree_hal_dim_t kShape[] = {2, 4};
- static float kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f,
- 4.0f, 5.0f, 6.0f, 7.0f};
+ static float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+ kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
iree_hal_buffer_params_t params = {0};
params.type =
IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;