| // Copyright 2023 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "experimental/metal/direct_allocator.h" |
| |
| #import <Metal/Metal.h> |
| |
| #include "experimental/metal/metal_buffer.h" |
| #include "iree/base/api.h" |
| #include "iree/base/target_platform.h" |
| #include "iree/base/tracing.h" |
| #include "iree/hal/api.h" |
| |
| #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING |
| static const char* IREE_HAL_METAL_ALLOCATOR_ID = "METAL"; |
| #endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING |
| |
| typedef struct iree_hal_metal_allocator_t { |
| // Abstract resource used for injecting reference counting and vtable; must be at offset 0. |
| iree_hal_resource_t resource; |
| |
| // The device that this allocator is attached to. |
| id<MTLDevice> device; |
| // The command queue that we can use to issue commands to make buffer contents visible to CPU. |
| #if defined(IREE_PLATFORM_MACOS) |
| id<MTLCommandQueue> queue; |
| #endif // IREE_PLATFORM_MACOS |
| |
| bool is_unified_memory; |
| iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode; |
| |
| iree_allocator_t host_allocator; |
| |
| IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;) |
| } iree_hal_metal_allocator_t; |
| |
| static const iree_hal_allocator_vtable_t iree_hal_metal_allocator_vtable; |
| |
| static iree_hal_metal_allocator_t* iree_hal_metal_allocator_cast(iree_hal_allocator_t* base_value) { |
| IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_metal_allocator_vtable); |
| return (iree_hal_metal_allocator_t*)base_value; |
| } |
| |
| static const iree_hal_metal_allocator_t* iree_hal_metal_allocator_const_cast( |
| const iree_hal_allocator_t* base_value) { |
| IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_metal_allocator_vtable); |
| return (const iree_hal_metal_allocator_t*)base_value; |
| } |
| |
| iree_status_t iree_hal_metal_allocator_create( |
| id<MTLDevice> device, |
| #if defined(IREE_PLATFORM_MACOS) |
| id<MTLCommandQueue> queue, |
| #endif // IREE_PLATFORM_MACOS |
| iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode, |
| iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator) { |
| IREE_ASSERT_ARGUMENT(out_allocator); |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| iree_hal_metal_allocator_t* allocator = NULL; |
| iree_status_t status = |
| iree_allocator_malloc(host_allocator, sizeof(*allocator), (void**)&allocator); |
| |
| if (iree_status_is_ok(status)) { |
| iree_hal_resource_initialize(&iree_hal_metal_allocator_vtable, &allocator->resource); |
| allocator->device = [device retain]; // +1 |
| allocator->is_unified_memory = [device hasUnifiedMemory]; |
| allocator->resource_tracking_mode = resource_tracking_mode; |
| allocator->host_allocator = host_allocator; |
| |
| *out_allocator = (iree_hal_allocator_t*)allocator; |
| } |
| |
| IREE_TRACE_ZONE_END(z0); |
| return status; |
| } |
| |
| static void iree_hal_metal_allocator_destroy(iree_hal_allocator_t* IREE_RESTRICT base_allocator) { |
| iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator); |
| iree_allocator_t host_allocator = allocator->host_allocator; |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| [allocator->device release]; // -1 |
| iree_allocator_free(host_allocator, allocator); |
| |
| IREE_TRACE_ZONE_END(z0); |
| } |
| |
| static iree_allocator_t iree_hal_metal_allocator_host_allocator( |
| const iree_hal_allocator_t* IREE_RESTRICT base_allocator) { |
| iree_hal_metal_allocator_t* allocator = (iree_hal_metal_allocator_t*)base_allocator; |
| return allocator->host_allocator; |
| } |
| |
| #if defined(IREE_PLATFORM_MACOS) |
| id<MTLCommandQueue> iree_hal_metal_allocator_command_queue( |
| const iree_hal_allocator_t* base_allocator) { |
| const iree_hal_metal_allocator_t* allocator = (const iree_hal_metal_allocator_t*)base_allocator; |
| return allocator->queue; |
| } |
| #endif // IREE_PLATFORM_MACOS |
| |
| static iree_status_t iree_hal_metal_allocator_trim( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator) { |
| return iree_ok_status(); |
| } |
| |
| static void iree_hal_metal_allocator_query_statistics( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, |
| iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) { |
| IREE_STATISTICS({ |
| iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator); |
| memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics)); |
| }); |
| } |
| |
| static iree_hal_buffer_compatibility_t iree_hal_metal_allocator_query_buffer_compatibility( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, |
| iree_hal_buffer_params_t* IREE_RESTRICT params, |
| iree_device_size_t* IREE_RESTRICT allocation_size) { |
| // All buffers can be allocated on the heap. |
| iree_hal_buffer_compatibility_t compatibility = IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE; |
| |
| if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) { |
| compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER; |
| } |
| |
| // Buffers can only be used on the queue if they are device visible. |
| if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) { |
| if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) { |
| compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER; |
| } |
| if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) { |
| compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH; |
| } |
| } |
| |
| if (iree_all_bits_set(params->type, |
| IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) { |
| // On iOS, we don't have device local + host visible memory. But given the unified memory |
| // architecture, it's fine to just request host local + device visible memory. |
| // On macOS, for unified memory architecture, it's similar to iOS. Otherwise, we can have |
| // device local + host visible memory backed by Managed storage mode. |
| iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator); |
| if (allocator->is_unified_memory) { |
| params->type &= ~(IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE); |
| params->type |= IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE; |
| // We have suggested other configurations than the original request. Mark accordingly. |
| compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_LOW_PERFORMANCE; |
| } |
| } |
| |
| // We are now optimal. |
| params->type &= ~IREE_HAL_MEMORY_TYPE_OPTIMAL; |
| |
| // Guard against the corner case where the requested buffer size is 0. The application is unlikely |
| // to do anything when requesting a 0-byte buffer; but it can happen in real world use cases. So |
| // we should at least not crash. |
| if (*allocation_size == 0) *allocation_size = 4; |
| |
| // Align allocation sizes to 4 bytes so shaders operating on 32 bit types can act safely even on |
| // buffer ranges that are not naturally aligned. |
| *allocation_size = iree_host_align(*allocation_size, 4); |
| |
| return compatibility; |
| } |
| |
| // Returns the corresponding Metal resource options controlling storage modes, CPU caching modes, |
| // and hazard tracking modes for the given IREE HAL memory |type|. |
| static MTLResourceOptions iree_hal_metal_select_resource_options( |
| iree_hal_memory_type_t type, bool is_unified_memory, bool has_init_data, |
| iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode) { |
| MTLResourceOptions options; |
| |
| // Select a storage mode. There are four MTLStorageMode: |
| // * Shared: The resource is stored in system memory and is accessible to both the CPU and |
| // the GPU. |
| // * Managed: The CPU and GPU may maintain separate copies of the resource, and any changes |
| // must be explicitly synchronized. |
| // * Private: The resource can be accessed only by the GPU. |
| // * Memoryless: The resource's contents can be accessed only by the GPU and only exist |
| // temporarily during a render pass. |
| // macOS has all of the above; MTLStorageModeManaged is not available on iOS. |
| // |
| // The IREE HAL is modeled after explicit APIs like Vulkan. For buffers visible to both the host |
| // and the device, we would like to opt in with the explicit version (MTLStorageManaged) when |
| // possible because it should be more performant. |
| if (iree_all_bits_set(type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) { |
| if (iree_all_bits_set(type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) { |
| // Device local + host visible. |
| // iree_hal_metal_allocator_query_buffer_compatibility guarantees that we only fall into this |
| // case for macOS devices with non-uniform memory. |
| #if defined(IREE_PLATFORM_MACOS) |
| IREE_ASSERT(!is_unified_memory); |
| options = MTLResourceStorageModeManaged; |
| #else |
| options = MTLResourceStorageModeShared; |
| #endif // IREE_PLATFORM_MACOS |
| } else { |
| // Device local + host invisible. |
| // For unified memory, update it to Shared storage mode if we have initial data. |
| options = (is_unified_memory && has_init_data) ? MTLResourceStorageModeShared |
| : MTLResourceStorageModePrivate; |
| } |
| } else { |
| if (iree_all_bits_set(type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) { |
| // Host local + device visible. |
| options = MTLResourceStorageModeShared; |
| } else { |
| // Host local + device invisible. |
| options = MTLResourceStorageModeShared; |
| } |
| } |
| |
| // Select a CPU cache mode. |
| if (iree_all_bits_set(type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) { |
| // The default CPU cache mode for the resource, which guarantees that read and write operations |
| // are executed in the expected order. |
| options |= MTLResourceCPUCacheModeDefaultCache; |
| } else { |
| // A write-combined CPU cache mode that is optimized for resources that the CPU writes into, but |
| // never reads. |
| options |= MTLResourceCPUCacheModeWriteCombined; |
| } |
| |
| options |= resource_tracking_mode == IREE_HAL_METAL_RESOURCE_HAZARD_TRACKING_MODE_TRACKED |
| ? MTLResourceHazardTrackingModeTracked |
| : MTLResourceHazardTrackingModeUntracked; |
| return options; |
| } |
| |
| static iree_status_t iree_hal_metal_allocator_allocate_buffer( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, |
| const iree_hal_buffer_params_t* IREE_RESTRICT params, iree_device_size_t allocation_size, |
| iree_const_byte_span_t initial_data, iree_hal_buffer_t** IREE_RESTRICT out_buffer) { |
| iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator); |
| IREE_TRACE_ZONE_BEGIN(z0); |
| IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, allocation_size); |
| |
| // Coerce options into those required by the current device. |
| iree_hal_buffer_params_t compat_params = *params; |
| iree_hal_buffer_compatibility_t compatibility = |
| iree_hal_metal_allocator_query_buffer_compatibility(base_allocator, &compat_params, |
| &allocation_size); |
| if (!iree_all_bits_set(compatibility, IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) { |
| #if IREE_STATUS_MODE |
| iree_bitfield_string_temp_t temp0, temp1, temp2; |
| iree_string_view_t memory_type_str = iree_hal_memory_type_format(params->type, &temp0); |
| iree_string_view_t usage_str = iree_hal_buffer_usage_format(params->usage, &temp1); |
| iree_string_view_t compatibility_str = |
| iree_hal_buffer_compatibility_format(compatibility, &temp2); |
| IREE_TRACE_ZONE_END(z0); |
| return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, |
| "allocator cannot allocate a buffer with the given parameters; " |
| "memory_type=%.*s, usage=%.*s, compatibility=%.*s", |
| (int)memory_type_str.size, memory_type_str.data, (int)usage_str.size, |
| usage_str.data, (int)compatibility_str.size, compatibility_str.data); |
| #else |
| IREE_TRACE_ZONE_END(z0); |
| return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, |
| "allocator cannot allocate a buffer with the given parameters"); |
| #endif // IREE_STATUS_MODE |
| } |
| |
| bool has_init_data = !iree_const_byte_span_is_empty(initial_data); |
| MTLResourceOptions options = |
| iree_hal_metal_select_resource_options(compat_params.type, allocator->is_unified_memory, |
| has_init_data, allocator->resource_tracking_mode); |
| if (has_init_data && iree_all_bits_set(options, MTLResourceStorageModePrivate)) { |
| IREE_TRACE_ZONE_END(z0); |
| // This should only happen for macOS devices with non-uniform memory; for uniform memory |
| // devices, the allocator compatibility would update it to use Shared storage mode. |
| return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "private storage with initial data"); |
| } |
| |
| id<MTLBuffer> metal_buffer = nil; |
| // If we chose shared storage mode, we can handle initial data with newByfferWithBytes directly. |
| // Otherwise we just create the buffer here, and explicitly transfer range later. |
| if (has_init_data && iree_all_bits_set(options, MTLResourceStorageModeShared)) { |
| IREE_ASSERT_EQ(allocation_size, initial_data.data_length); |
| metal_buffer = [allocator->device newBufferWithBytes:(void*)initial_data.data |
| length:initial_data.data_length |
| options:options]; // +1 |
| } else { |
| metal_buffer = [allocator->device newBufferWithLength:allocation_size options:options]; // +1 |
| } |
| if (!metal_buffer) { |
| IREE_TRACE_ZONE_END(z0); |
| return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, "unable to allocate buffer"); |
| } |
| iree_hal_buffer_t* buffer = NULL; |
| iree_status_t status = iree_hal_metal_buffer_wrap( |
| #if defined(IREE_PLATFORM_MACOS) |
| allocator->queue, |
| #endif // IREE_PLATFORM_MACOS |
| metal_buffer, base_allocator, compat_params.type, compat_params.access, compat_params.usage, |
| allocation_size, /*byte_offset=*/0, /*byte_length=*/allocation_size, |
| iree_hal_buffer_release_callback_null(), &buffer); // +1 |
| |
| #if defined(IREE_PLATFORM_MACOS) |
| if (iree_status_is_ok(status) && has_init_data && |
| !iree_all_bits_set(options, MTLResourceStorageModePrivate)) { |
| // Explicitly handle synchronization for Managed storage mode. |
| if (iree_all_bits_set(options, MTLResourceStorageModeManaged)) { |
| memcpy(metal_buffer.contents, initial_data.data, initial_data.data_length); |
| [metal_buffer didModifyRange:NSMakeRange(0, initial_data.data_length)]; |
| } |
| } |
| #endif // IREE_PLATFORM_MACOS |
| |
| if (iree_status_is_ok(status)) { |
| IREE_TRACE_ALLOC_NAMED(IREE_HAL_METAL_ALLOCATOR_ID, (void*)iree_hal_metal_buffer_handle(buffer), |
| allocation_size); |
| IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc( |
| &allocator->statistics, compat_params.type, allocation_size)); |
| *out_buffer = buffer; |
| } else { |
| if (buffer) iree_hal_buffer_release(buffer); |
| } |
| |
| [metal_buffer release]; // -1 |
| |
| IREE_TRACE_ZONE_END(z0); |
| return status; |
| } |
| |
| static void iree_hal_metal_allocator_deallocate_buffer( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, |
| iree_hal_buffer_t* IREE_RESTRICT base_buffer) { |
| iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator); |
| |
| IREE_TRACE_FREE_NAMED(IREE_HAL_METAL_ALLOCATOR_ID, |
| (void*)iree_hal_metal_buffer_handle(base_buffer)); |
| IREE_STATISTICS(iree_hal_allocator_statistics_record_free( |
| &allocator->statistics, iree_hal_buffer_memory_type(base_buffer), |
| iree_hal_buffer_allocation_size(base_buffer))); |
| |
| iree_hal_buffer_destroy(base_buffer); // -1 |
| } |
| |
| static iree_status_t iree_hal_metal_allocator_import_buffer( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, |
| const iree_hal_buffer_params_t* IREE_RESTRICT params, |
| iree_hal_external_buffer_t* IREE_RESTRICT external_buffer, |
| iree_hal_buffer_release_callback_t release_callback, |
| iree_hal_buffer_t** IREE_RESTRICT out_buffer) { |
| return iree_make_status(IREE_STATUS_UNAVAILABLE, "unsupported importing from external buffer"); |
| } |
| |
| static iree_status_t iree_hal_metal_allocator_export_buffer( |
| iree_hal_allocator_t* IREE_RESTRICT base_allocator, iree_hal_buffer_t* IREE_RESTRICT buffer, |
| iree_hal_external_buffer_type_t requested_type, |
| iree_hal_external_buffer_flags_t requested_flags, |
| iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) { |
| return iree_make_status(IREE_STATUS_UNAVAILABLE, "unsupported exporting to external buffer"); |
| } |
| |
| static const iree_hal_allocator_vtable_t iree_hal_metal_allocator_vtable = { |
| .destroy = iree_hal_metal_allocator_destroy, |
| .host_allocator = iree_hal_metal_allocator_host_allocator, |
| .trim = iree_hal_metal_allocator_trim, |
| .query_statistics = iree_hal_metal_allocator_query_statistics, |
| .query_buffer_compatibility = iree_hal_metal_allocator_query_buffer_compatibility, |
| .allocate_buffer = iree_hal_metal_allocator_allocate_buffer, |
| .deallocate_buffer = iree_hal_metal_allocator_deallocate_buffer, |
| .import_buffer = iree_hal_metal_allocator_import_buffer, |
| .export_buffer = iree_hal_metal_allocator_export_buffer, |
| }; |