Making HAL heap buffers align to IREE_HAL_HEAP_BUFFER_ALIGNMENT.
This lets the runtime and compiler agree on a minimum alignment.
diff --git a/iree/base/config.h b/iree/base/config.h
index 022a522..e82c02d 100644
--- a/iree/base/config.h
+++ b/iree/base/config.h
@@ -155,6 +155,14 @@
// Enables optional HAL features. Each of these may add several KB to the final
// binary when linked dynamically.
+#if !defined(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
+// Power of two byte alignment required on all host heap buffers.
+// Executables are compiled with alignment expectations and the runtime
+// alignment must be greater than or equal to the alignment set in the compiler.
+// External buffers wrapped by HAL buffers must meet this alignment requirement.
+#define IREE_HAL_HEAP_BUFFER_ALIGNMENT 64
+#endif // IREE_HAL_HEAP_BUFFER_ALIGNMENT
+
#if !defined(IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE)
// Enables additional validation of commands issued against command buffers.
// This adds small amounts of per-command overhead but in all but the most
diff --git a/iree/hal/buffer.h b/iree/hal/buffer.h
index f41e2a5..fb23ce0 100644
--- a/iree/hal/buffer.h
+++ b/iree/hal/buffer.h
@@ -517,22 +517,6 @@
iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
//===----------------------------------------------------------------------===//
-// iree_hal_heap_buffer_t
-//===----------------------------------------------------------------------===//
-
-// Wraps an existing host allocation in a buffer.
-// When the buffer is destroyed the provided |data_allocator| will be used to
-// free |data|. Pass iree_allocator_null() to wrap without ownership semantics.
-//
-// |out_buffer| must be released by the caller.
-IREE_API_EXPORT iree_status_t iree_hal_heap_buffer_wrap(
- iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
- iree_hal_memory_access_t allowed_access,
- iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
- iree_byte_span_t data, iree_allocator_t data_allocator,
- iree_hal_buffer_t** out_buffer);
-
-//===----------------------------------------------------------------------===//
// iree_hal_buffer_t implementation details
//===----------------------------------------------------------------------===//
diff --git a/iree/hal/buffer_heap.c b/iree/hal/buffer_heap.c
index 7d3ec2f..e2c3ee8 100644
--- a/iree/hal/buffer_heap.c
+++ b/iree/hal/buffer_heap.c
@@ -27,6 +27,31 @@
static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable;
+enum {
+ IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED = 1u << 0,
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED = 1u << 1,
+ IREE_HAL_HEAP_BUFFER_FLAG_MASK = IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED |
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED,
+};
+
+static inline uint8_t* iree_hal_heap_buffer_ptr(
+ const iree_hal_heap_buffer_t* buffer) {
+ return (uint8_t*)((uintptr_t)buffer->data.data &
+ ~IREE_HAL_HEAP_BUFFER_FLAG_MASK);
+}
+
+static inline bool iree_hal_heap_buffer_data_is_aligned(
+ const iree_hal_heap_buffer_t* buffer) {
+ return iree_any_bit_set((uintptr_t)buffer->data.data,
+ IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED);
+}
+
+static inline bool iree_hal_heap_buffer_metadata_is_aligned(
+ const iree_hal_heap_buffer_t* buffer) {
+ return iree_any_bit_set((uintptr_t)buffer->data.data,
+ IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED);
+}
+
// Allocates a buffer with the metadata and storage split.
// This results in an additional host allocation but allows for user-overridden
// data storage allocations.
@@ -35,16 +60,23 @@
iree_allocator_t host_allocator, iree_hal_heap_buffer_t** out_buffer,
iree_byte_span_t* out_data) {
// Try allocating the storage first as it's the most likely to fail if OOM.
+ // It must be aligned to the minimum buffer alignment.
out_data->data_length = allocation_size;
- IREE_RETURN_IF_ERROR(iree_allocator_malloc(data_allocator, allocation_size,
- (void**)&out_data->data));
+ uintptr_t data_ptr = 0;
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ data_allocator, allocation_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT,
+ /*offset=*/0, (void**)&data_ptr));
+ IREE_ASSERT_TRUE(
+ iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ data_ptr |= IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED;
+ out_data->data = (uint8_t*)data_ptr;
- // Allocate the host metadata wrapper.
+ // Allocate the host metadata wrapper with natural alignment.
iree_status_t status = iree_allocator_malloc(
host_allocator, sizeof(**out_buffer), (void**)out_buffer);
if (!iree_status_is_ok(status)) {
// Need to free the storage we just allocated.
- iree_allocator_free(data_allocator, out_data->data);
+ iree_allocator_free_aligned(data_allocator, out_data->data);
}
return status;
}
@@ -55,16 +87,29 @@
static iree_status_t iree_hal_heap_buffer_allocate_slab(
iree_device_size_t allocation_size, iree_allocator_t host_allocator,
iree_hal_heap_buffer_t** out_buffer, iree_byte_span_t* out_data) {
- // NOTE: we want the buffer data to always be 16-byte aligned.
+ // The metadata header is always aligned and we want to ensure it's padded
+ // out to the max alignment.
iree_hal_heap_buffer_t* buffer = NULL;
iree_host_size_t header_size =
- iree_host_align(iree_sizeof_struct(*buffer), 16);
+ iree_host_align(iree_sizeof_struct(*buffer), iree_max_align_t);
iree_host_size_t total_size = header_size + allocation_size;
- IREE_RETURN_IF_ERROR(
- iree_allocator_malloc(host_allocator, total_size, (void**)&buffer));
+
+ // Allocate with the data starting at offset header_size aligned to the
+ // minimum required buffer alignment. The header itself will still be aligned
+ // to the natural alignment but our buffer alignment is often much larger.
+ IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
+ host_allocator, total_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT, header_size,
+ (void**)&buffer));
*out_buffer = buffer;
- *out_data =
- iree_make_byte_span((uint8_t*)buffer + header_size, allocation_size);
+
+ // Set bit indicating that we need to free the metadata with
+ // iree_allocator_free_aligned.
+ uintptr_t data_ptr = (uintptr_t)buffer + header_size;
+ IREE_ASSERT_TRUE(
+ iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+ data_ptr |= IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED;
+ *out_data = iree_make_byte_span((uint8_t*)data_ptr, allocation_size);
+
return iree_ok_status();
}
@@ -82,7 +127,7 @@
// If the data and host allocators are the same we can allocate more
// efficiently as a large slab. Otherwise we need to allocate both the
// metadata and the storage independently.
- bool same_allocator =
+ const bool same_allocator =
memcmp(&data_allocator, &host_allocator, sizeof(data_allocator)) == 0;
iree_hal_heap_buffer_t* buffer = NULL;
@@ -120,16 +165,27 @@
return status;
}
-IREE_API_EXPORT iree_status_t iree_hal_heap_buffer_wrap(
- iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
- iree_hal_memory_access_t allowed_access,
- iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
- iree_byte_span_t data, iree_allocator_t data_allocator,
- iree_hal_buffer_t** out_buffer) {
+iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
+ iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ iree_device_size_t allocation_size,
+ iree_byte_span_t data,
+ iree_allocator_t data_allocator,
+ iree_hal_buffer_t** out_buffer) {
IREE_ASSERT_ARGUMENT(allocator);
IREE_ASSERT_ARGUMENT(out_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
+ uintptr_t data_ptr = (uintptr_t)data.data & ~IREE_HAL_HEAP_BUFFER_FLAG_MASK;
+ if (!iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+ IREE_TRACE_ZONE_END(z0);
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "imported heap buffer data must be aligned to %d; got %p",
+ (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, (void*)data_ptr);
+ }
+
iree_allocator_t host_allocator =
iree_hal_allocator_host_allocator(allocator);
iree_hal_heap_buffer_t* buffer = NULL;
@@ -164,8 +220,16 @@
}
});
- iree_allocator_free(buffer->data_allocator, buffer->data.data);
- iree_allocator_free(host_allocator, buffer);
+ if (iree_hal_heap_buffer_data_is_aligned(buffer)) {
+ iree_allocator_free_aligned(buffer->data_allocator, buffer->data.data);
+ } else {
+ iree_allocator_free(buffer->data_allocator, buffer->data.data);
+ }
+ if (iree_hal_heap_buffer_metadata_is_aligned(buffer)) {
+ iree_allocator_free_aligned(host_allocator, buffer);
+ } else {
+ iree_allocator_free(host_allocator, buffer);
+ }
IREE_TRACE_ZONE_END(z0);
}
@@ -176,8 +240,8 @@
iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
- mapping->contents = iree_make_byte_span(buffer->data.data + local_byte_offset,
- local_byte_length);
+ mapping->contents = iree_make_byte_span(
+ iree_hal_heap_buffer_ptr(buffer) + local_byte_offset, local_byte_length);
// If we mapped for discard scribble over the bytes. This is not a mandated
// behavior but it will make debugging issues easier. Alternatively for
diff --git a/iree/hal/buffer_heap_impl.h b/iree/hal/buffer_heap_impl.h
index 5a2c5ac..5ade6e6 100644
--- a/iree/hal/buffer_heap_impl.h
+++ b/iree/hal/buffer_heap_impl.h
@@ -38,6 +38,26 @@
iree_allocator_t data_allocator, iree_allocator_t host_allocator,
iree_hal_buffer_t** out_buffer);
+// Wraps an existing host allocation in a buffer.
+// When the buffer is destroyed the provided |data_allocator| will be used to
+// free |data| using iree_allocator_free. Pass iree_allocator_null() to wrap
+// without ownership semantics.
+//
+// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT.
+// Note that it will be freed as a normal unaligned allocation. If we find
+// ourselves wanting to wrap aligned allocations requiring
+// iree_allocator_free_aligned then we'll need a flag to indicate that.
+//
+// |out_buffer| must be released by the caller.
+iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
+ iree_hal_memory_type_t memory_type,
+ iree_hal_memory_access_t allowed_access,
+ iree_hal_buffer_usage_t allowed_usage,
+ iree_device_size_t allocation_size,
+ iree_byte_span_t data,
+ iree_allocator_t data_allocator,
+ iree_hal_buffer_t** out_buffer);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/iree/hal/cts/buffer_mapping_test.h b/iree/hal/cts/buffer_mapping_test.h
index 652d010..0019c67 100644
--- a/iree/hal/cts/buffer_mapping_test.h
+++ b/iree/hal/cts/buffer_mapping_test.h
@@ -541,7 +541,6 @@
}
// TODO(scotttodd): iree_hal_allocator_wrap_buffer
-// TODO(scotttodd): iree_hal_heap_buffer_wrap
} // namespace cts
} // namespace hal