[metal] Keep track of queue in buffer construction for macOS
For managed storage mode, we need to issue GPU commands to
explicitly make data visible to CPU. This would require
having a handle to a MTLCommandQueue. Instead of creating
a new one internally, give the queue to the buffer during
construction time.
diff --git a/experimental/metal/direct_allocator.h b/experimental/metal/direct_allocator.h
index 5e3621f..ae39a18 100644
--- a/experimental/metal/direct_allocator.h
+++ b/experimental/metal/direct_allocator.h
@@ -17,19 +17,28 @@
extern "C" {
#endif // __cplusplus
-// Create a straightforward Metal allocator from the given |base_device| that
+// Creates a straightforward Metal allocator from the given |device| that
// performs allocations separately without caching or suballocation.
//
+// On macOS, we additionally need the command queue to encode commands to make
+// buffer contents visible to the CPU for managed storage type.
+//
// |out_allocator| must be released by the caller (see
// iree_hal_allocator_release).
iree_status_t iree_hal_metal_allocator_create(
- iree_hal_device_t* base_device, id<MTLDevice> device,
+ id<MTLDevice> device,
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue,
+#endif // IREE_PLATFORM_MACOS
iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode,
iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
-// Returns the underyling HAL device associated with the given |allocator|.
-const iree_hal_device_t* iree_hal_metal_allocator_device(
+#if defined(IREE_PLATFORM_MACOS)
+// Returns the underyling MetalCommandQueue associated with the given
+// |allocator|.
+id<MTLCommandQueue> iree_hal_metal_allocator_command_queue(
const iree_hal_allocator_t* allocator);
+#endif // IREE_PLATFORM_MACOS
#ifdef __cplusplus
} // extern "C"
diff --git a/experimental/metal/direct_allocator.m b/experimental/metal/direct_allocator.m
index 84b6137..9c58a4d 100644
--- a/experimental/metal/direct_allocator.m
+++ b/experimental/metal/direct_allocator.m
@@ -23,8 +23,11 @@
iree_hal_resource_t resource;
// The device that this allocator is attached to.
- iree_hal_device_t* base_device;
id<MTLDevice> device;
+ // The command queue that we can use to issue commands to make buffer contents visible to CPU.
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue;
+#endif // IREE_PLATFORM_MACOS
bool is_unified_memory;
iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode;
@@ -48,10 +51,12 @@
}
iree_status_t iree_hal_metal_allocator_create(
- iree_hal_device_t* base_device, id<MTLDevice> device,
+ id<MTLDevice> device,
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue,
+#endif // IREE_PLATFORM_MACOS
iree_hal_metal_resource_hazard_tracking_mode_t resource_tracking_mode,
iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator) {
- IREE_ASSERT_ARGUMENT(base_device);
IREE_ASSERT_ARGUMENT(out_allocator);
IREE_TRACE_ZONE_BEGIN(z0);
@@ -61,8 +66,6 @@
if (iree_status_is_ok(status)) {
iree_hal_resource_initialize(&iree_hal_metal_allocator_vtable, &allocator->resource);
- allocator->base_device = base_device;
- iree_hal_device_retain(base_device);
allocator->device = [device retain]; // +1
allocator->is_unified_memory = [device hasUnifiedMemory];
allocator->resource_tracking_mode = resource_tracking_mode;
@@ -81,7 +84,6 @@
IREE_TRACE_ZONE_BEGIN(z0);
[allocator->device release]; // -1
- iree_hal_device_release(allocator->base_device);
iree_allocator_free(host_allocator, allocator);
IREE_TRACE_ZONE_END(z0);
@@ -93,11 +95,13 @@
return allocator->host_allocator;
}
-const iree_hal_device_t* iree_hal_metal_allocator_device(
+#if defined(IREE_PLATFORM_MACOS)
+id<MTLCommandQueue> iree_hal_metal_allocator_command_queue(
const iree_hal_allocator_t* base_allocator) {
const iree_hal_metal_allocator_t* allocator = (const iree_hal_metal_allocator_t*)base_allocator;
- return allocator->base_device;
+ return allocator->queue;
}
+#endif // IREE_PLATFORM_MACOS
static iree_status_t iree_hal_metal_allocator_trim(
iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
@@ -285,6 +289,9 @@
iree_hal_buffer_t* buffer = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_metal_buffer_wrap(
+#if defined(IREE_PLATFORM_MACOS)
+ allocator->queue,
+#endif // IREE_PLATFORM_MACOS
metal_buffer, base_allocator, compat_params.type, compat_params.access, compat_params.usage,
allocation_size, /*byte_offset=*/0,
/*byte_length=*/allocation_size, iree_hal_buffer_release_callback_null(), &buffer); // +1
diff --git a/experimental/metal/metal_buffer.h b/experimental/metal/metal_buffer.h
index 01145c4..b7f0b65 100644
--- a/experimental/metal/metal_buffer.h
+++ b/experimental/metal/metal_buffer.h
@@ -20,6 +20,9 @@
//
// |out_buffer| must be released by the caller (see iree_hal_buffer_release).
iree_status_t iree_hal_metal_buffer_wrap(
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue,
+#endif // IREE_PLATFORM_MACOS
id<MTLBuffer> metal_buffer, iree_hal_allocator_t* allocator,
iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
diff --git a/experimental/metal/metal_buffer.m b/experimental/metal/metal_buffer.m
index 8e927a1..5fa909e 100644
--- a/experimental/metal/metal_buffer.m
+++ b/experimental/metal/metal_buffer.m
@@ -18,6 +18,10 @@
typedef struct iree_hal_metal_buffer_t {
iree_hal_buffer_t base;
id<MTLBuffer> buffer;
+ // The command queue that we can use to issue commands to make buffer contents visible to CPU.
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue;
+#endif // IREE_PLATFORM_MACOS
iree_hal_buffer_release_callback_t release_callback;
} iree_hal_metal_buffer_t;
@@ -35,6 +39,9 @@
}
iree_status_t iree_hal_metal_buffer_wrap(
+#if defined(IREE_PLATFORM_MACOS)
+ id<MTLCommandQueue> queue,
+#endif // IREE_PLATFORM_MACOS
id<MTLBuffer> metal_buffer, iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
iree_hal_memory_access_t allowed_access, iree_hal_buffer_usage_t allowed_usage,
iree_device_size_t allocation_size, iree_device_size_t byte_offset,
@@ -52,6 +59,9 @@
byte_offset, byte_length, memory_type, allowed_access, allowed_usage,
&iree_hal_metal_buffer_vtable, &buffer->base);
buffer->buffer = [metal_buffer retain]; // +1
+#if defined(IREE_PLATFORM_MACOS)
+ buffer->queue = queue;
+#endif // IREE_PLATFORM_MACOS
buffer->release_callback = release_callback;
*out_buffer = &buffer->base;
}
@@ -90,10 +100,7 @@
// and commit to the queue.
iree_hal_metal_buffer_t* buffer = iree_hal_metal_buffer_cast(base_buffer);
if (buffer->buffer.storageMode == MTLStorageModeManaged) {
- const iree_hal_device_t* device =
- iree_hal_metal_allocator_device(buffer->base.device_allocator);
- id<MTLCommandQueue> queue = iree_hal_metal_device_command_queue(device);
- id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
+ id<MTLCommandBuffer> command_buffer = [buffer->queue commandBuffer];
id<MTLBlitCommandEncoder> blitCommandEncoder = [command_buffer blitCommandEncoder];
[blitCommandEncoder synchronizeResource:buffer->buffer];
diff --git a/experimental/metal/metal_device.h b/experimental/metal/metal_device.h
index 656fd09..3348913 100644
--- a/experimental/metal/metal_device.h
+++ b/experimental/metal/metal_device.h
@@ -30,11 +30,6 @@
const iree_hal_metal_device_params_t* iree_hal_metal_device_params(
const iree_hal_device_t* device);
-// Returns the Metal command queue associated with the given |device|.
-// Note that right now we only support one command queue per Metal device.
-id<MTLCommandQueue> iree_hal_metal_device_command_queue(
- const iree_hal_device_t* device);
-
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/experimental/metal/metal_device.m b/experimental/metal/metal_device.m
index bcbeb37..191bd25 100644
--- a/experimental/metal/metal_device.m
+++ b/experimental/metal/metal_device.m
@@ -86,11 +86,6 @@
return &device->params;
}
-id<MTLCommandQueue> iree_hal_metal_device_command_queue(const iree_hal_device_t* base_device) {
- const iree_hal_metal_device_t* device = iree_hal_metal_device_const_cast(base_device);
- return device->queue;
-}
-
static iree_status_t iree_hal_metal_device_create_internal(
iree_string_view_t identifier, const iree_hal_metal_device_params_t* params,
id<MTLDevice> metal_device, iree_allocator_t host_allocator, iree_hal_device_t** out_device) {
@@ -99,7 +94,12 @@
iree_host_size_t total_size = iree_sizeof_struct(*device) + identifier.size;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(host_allocator, total_size, (void**)&device));
- iree_status_t status = iree_hal_metal_allocator_create((iree_hal_device_t*)device, metal_device,
+ id<MTLCommandQueue> metal_queue = [metal_device newCommandQueue]; // +1
+
+ iree_status_t status = iree_hal_metal_allocator_create(metal_device,
+#if defined(IREE_PLATFORM_MACOS)
+ metal_queue,
+#endif // IREE_PLATFORM_MACOS
params->resource_hazard_tracking_mode,
host_allocator, &device->device_allocator);
@@ -125,8 +125,8 @@
iree_arena_block_pool_initialize(params->arena_block_size, host_allocator, &device->block_pool);
device->params = *params;
device->host_allocator = host_allocator;
- device->device = [metal_device retain]; // +1
- device->queue = [metal_device newCommandQueue]; // +1
+ device->device = [metal_device retain]; // +1
+ device->queue = metal_queue;
device->command_buffer_resource_reference_mode = params->command_buffer_resource_reference_mode;
device->builtin_executable = builtin_executable;
dispatch_queue_attr_t queue_attr = dispatch_queue_attr_make_with_qos_class(