Revert #19074 and #19082 to fix broken AMDGPU tests. (#19086)
Failing tests from the first PR:
https://github.com/iree-org/iree/actions/runs/11748241815/job/32733175285#step:8:482
Second PR was landed on top, preventing a clean revert without also
reverting that.
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_device.c b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
index d5ed448..2ed014b 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_device.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
@@ -1171,8 +1171,6 @@
iree_hal_cuda_deferred_work_queue_device_interface_create_stream_command_buffer,
.submit_command_buffer =
iree_hal_cuda_deferred_work_queue_device_interface_submit_command_buffer,
- .async_alloc = NULL,
- .async_dealloc = NULL,
};
static const iree_hal_stream_tracing_device_interface_vtable_t
diff --git a/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h b/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
index 963784d..1a3fc50 100644
--- a/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
+++ b/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
@@ -73,7 +73,6 @@
hipMemPool_t, hipStream_t)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocManaged, hipDeviceptr_t *, size_t,
unsigned int)
-IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocAsync, void **, size_t, hipStream_t)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpy, void *, const void *, size_t,
hipMemcpyKind)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpyAsync, void *, const void *, size_t,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.c b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
index 041e011..1073ccc 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
@@ -111,10 +111,6 @@
IREE_TRACE_ZONE_END(z0);
}
-bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value) {
- return iree_hal_resource_is(base_value, &iree_hal_hip_allocator_vtable);
-}
-
static iree_allocator_t iree_hal_hip_allocator_host_allocator(
const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
iree_hal_hip_allocator_t* allocator =
@@ -594,53 +590,6 @@
}
}
-iree_status_t iree_hal_hip_allocator_alloc_async(
- iree_hal_allocator_t* base_allocator, hipStream_t stream,
- iree_hal_buffer_t* buffer) {
- iree_hal_hip_allocator_t* allocator =
- iree_hal_hip_allocator_cast(base_allocator);
-
- hipDeviceptr_t ptr = NULL;
- iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
- allocator->symbols,
- hipMallocAsync(&ptr, (size_t)iree_hal_buffer_allocation_size(buffer),
- stream),
- "hipMallocAsync");
- if (iree_status_is_ok(status)) {
- iree_hal_hip_buffer_set_device_pointer(buffer, ptr);
- IREE_TRACE_ALLOC_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)ptr,
- iree_hal_buffer_allocation_size(buffer));
- IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
- &allocator->statistics, iree_hal_buffer_memory_type(buffer),
- iree_hal_buffer_allocation_size(buffer)));
- } else {
- iree_hal_hip_buffer_set_allocation_empty(buffer);
- }
-
- return status;
-}
-
-iree_status_t iree_hal_hip_allocator_free_async(
- iree_hal_allocator_t* base_allocator, hipStream_t stream,
- iree_hal_buffer_t* buffer) {
- iree_hal_hip_allocator_t* allocator =
- iree_hal_hip_allocator_cast(base_allocator);
- hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
- if (!device_ptr) {
- return iree_ok_status();
- }
-
- IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
- allocator->symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync"));
- iree_hal_hip_buffer_set_allocation_empty(buffer);
-
- IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
- IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
- &allocator->statistics, iree_hal_buffer_memory_type(buffer),
- iree_hal_buffer_allocation_size(buffer)));
- return iree_ok_status();
-}
-
static const iree_hal_allocator_vtable_t iree_hal_hip_allocator_vtable = {
.destroy = iree_hal_hip_allocator_destroy,
.host_allocator = iree_hal_hip_allocator_host_allocator,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.h b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
index bb83ea1..6d51060 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
@@ -25,16 +25,6 @@
hipStream_t stream, iree_hal_hip_memory_pools_t* pools,
iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
-bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value);
-
-iree_status_t iree_hal_hip_allocator_alloc_async(
- iree_hal_allocator_t* base_allocator, hipStream_t stream,
- iree_hal_buffer_t* buffer);
-
-iree_status_t iree_hal_hip_allocator_free_async(iree_hal_allocator_t* allocator,
- hipStream_t stream,
- iree_hal_buffer_t* buffer);
-
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.c b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
index 46e768a..fb318ba 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
@@ -11,7 +11,6 @@
#include <string.h>
#include "iree/base/api.h"
-#include "iree/base/internal/synchronization.h"
#include "iree/base/tracing.h"
typedef struct iree_hal_hip_buffer_t {
@@ -20,9 +19,6 @@
void* host_ptr;
hipDeviceptr_t device_ptr;
iree_hal_buffer_release_callback_t release_callback;
- iree_slim_mutex_t device_ptr_lock;
- iree_notification_t device_ptr_notification;
- bool empty;
} iree_hal_hip_buffer_t;
static const iree_hal_buffer_vtable_t iree_hal_hip_buffer_vtable;
@@ -69,9 +65,6 @@
buffer->host_ptr = host_ptr;
buffer->device_ptr = device_ptr;
buffer->release_callback = release_callback;
- buffer->empty = false;
- iree_slim_mutex_initialize(&buffer->device_ptr_lock);
- iree_notification_initialize(&buffer->device_ptr_notification);
*out_buffer = &buffer->base;
}
@@ -79,26 +72,6 @@
return status;
}
-void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* base_buffer,
- hipDeviceptr_t pointer) {
- iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
- IREE_ASSERT(buffer->device_ptr == NULL,
- "Cannot set a device_ptr to a buffer that already has one");
- iree_slim_mutex_lock(&buffer->device_ptr_lock);
- buffer->device_ptr = pointer;
- iree_slim_mutex_unlock(&buffer->device_ptr_lock);
- iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
-}
-
-void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* base_buffer) {
- iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
- iree_slim_mutex_lock(&buffer->device_ptr_lock);
- buffer->empty = true;
- buffer->device_ptr = NULL;
- iree_slim_mutex_unlock(&buffer->device_ptr_lock);
- iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
-}
-
static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
iree_allocator_t host_allocator = base_buffer->host_allocator;
@@ -107,8 +80,6 @@
buffer->release_callback.fn(buffer->release_callback.user_data,
base_buffer);
}
- iree_slim_mutex_deinitialize(&buffer->device_ptr_lock);
- iree_notification_deinitialize(&buffer->device_ptr_notification);
iree_allocator_free(host_allocator, buffer);
IREE_TRACE_ZONE_END(z0);
}
@@ -172,20 +143,10 @@
return buffer->type;
}
-static bool iree_hal_hip_buffer_has_device_ptr(void* arg) {
- iree_hal_hip_buffer_t* buffer = (iree_hal_hip_buffer_t*)arg;
- iree_slim_mutex_lock(&buffer->device_ptr_lock);
- bool has_ptr_or_error = buffer->device_ptr || buffer->empty;
- iree_slim_mutex_unlock(&buffer->device_ptr_lock);
- return has_ptr_or_error;
-}
-
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
- iree_hal_buffer_t* base_buffer) {
- iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
- iree_notification_await(&buffer->device_ptr_notification,
- iree_hal_hip_buffer_has_device_ptr, buffer,
- iree_infinite_timeout());
+ const iree_hal_buffer_t* base_buffer) {
+ const iree_hal_hip_buffer_t* buffer =
+ iree_hal_hip_buffer_const_cast(base_buffer);
return buffer->device_ptr;
}
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.h b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
index 4264b54..8fe1609 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
@@ -49,16 +49,8 @@
// Returns the HIP base pointer for the given |buffer|.
// This is the entire allocated_buffer and must be offset by the buffer
// byte_offset and byte_length when used.
-hipDeviceptr_t iree_hal_hip_buffer_device_pointer(iree_hal_buffer_t* buffer);
-
-// Sets the HIP base pointer for the given |buffer|.
-// This is the entire allocated_buffer and must be offset by the buffer
-// byte_offset and byte_length when used.
-void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* buffer,
- hipDeviceptr_t pointer);
-
-// Marks the buffer as having an intentionally empty allocation.
-void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* buffer);
+hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
+ const iree_hal_buffer_t* buffer);
// Returns the HIP host pointer for the given |buffer|, if available.
void* iree_hal_hip_buffer_host_pointer(const iree_hal_buffer_t* buffer);
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index c409eaa..7f42e8d 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -19,7 +19,6 @@
#include "iree/hal/drivers/hip/event_semaphore.h"
#include "iree/hal/drivers/hip/graph_command_buffer.h"
#include "iree/hal/drivers/hip/hip_allocator.h"
-#include "iree/hal/drivers/hip/hip_buffer.h"
#include "iree/hal/drivers/hip/memory_pools.h"
#include "iree/hal/drivers/hip/nop_executable_cache.h"
#include "iree/hal/drivers/hip/rccl_channel.h"
@@ -88,9 +87,6 @@
iree_hal_channel_provider_t* channel_provider;
} iree_hal_hip_device_t;
-static iree_hal_hip_device_t* iree_hal_hip_device_cast(
- iree_hal_device_t* base_value);
-
static const iree_hal_device_vtable_t iree_hal_hip_device_vtable;
static const iree_hal_deferred_work_queue_device_interface_vtable_t
iree_hal_hip_deferred_work_queue_device_interface_vtable;
@@ -170,7 +166,6 @@
return IREE_HIP_RESULT_TO_STATUS(device_interface->hip_symbols,
hipEventSynchronize((hipEvent_t)event));
}
-
static iree_status_t
iree_hal_hip_deferred_work_queue_device_interface_destroy_native_event(
iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
@@ -261,45 +256,6 @@
return status;
}
-static iree_status_t
-iree_hal_hip_deferred_work_queue_device_interface_async_alloc(
- iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
- iree_hal_buffer_t* buffer) {
- iree_hal_hip_deferred_work_queue_device_interface_t* device_interface =
- (iree_hal_hip_deferred_work_queue_device_interface_t*)
- base_device_interface;
- iree_hal_hip_device_t* device =
- iree_hal_hip_device_cast(device_interface->device);
- if (device->supports_memory_pools) {
- return iree_hal_hip_memory_pools_allocate_pointer(
- &device->memory_pools, buffer, device->hip_dispatch_stream,
- iree_hal_buffer_allocation_size(buffer));
- }
-
- return iree_hal_hip_allocator_alloc_async(
- iree_hal_device_allocator(device_interface->device),
- device->hip_dispatch_stream, buffer);
-}
-
-// Asynchronously frees a buffer.
-static iree_status_t
-iree_hal_hip_deferred_work_queue_device_interface_async_dealloc(
- iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
- iree_hal_buffer_t* buffer) {
- iree_hal_hip_deferred_work_queue_device_interface_t* device_interface =
- (iree_hal_hip_deferred_work_queue_device_interface_t*)
- base_device_interface;
- iree_hal_hip_device_t* device =
- iree_hal_hip_device_cast(device_interface->device);
- if (device->supports_memory_pools) {
- return iree_hal_hip_memory_pools_deallocate(
- &device->memory_pools, device->hip_dispatch_stream, buffer);
- }
- return iree_hal_hip_allocator_free_async(
- iree_hal_device_allocator(device_interface->device),
- device->hip_dispatch_stream, buffer);
-}
-
typedef struct iree_hal_hip_tracing_device_interface_t {
iree_hal_stream_tracing_device_interface_t base;
hipDevice_t device;
@@ -966,34 +922,6 @@
return IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_ONLY;
}
-static iree_status_t iree_hal_hip_device_pepare_async_alloc(
- iree_hal_hip_device_t* device, iree_hal_buffer_params_t params,
- iree_device_size_t allocation_size,
- iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
- IREE_TRACE_ZONE_BEGIN(z0);
- IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)allocation_size);
-
- iree_hal_buffer_params_canonicalize(¶ms);
-
- iree_hal_buffer_t* buffer = NULL;
- iree_status_t status = iree_hal_hip_buffer_wrap(
- device->device_allocator, params.type, params.access, params.usage,
- allocation_size, /*byte_offset=*/0,
- /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
- /*device_ptr=*/NULL, /*host_ptr=*/NULL,
- iree_hal_buffer_release_callback_null(), device->host_allocator, &buffer);
-
- if (iree_status_is_ok(status)) {
- *out_buffer = buffer;
- } else if (buffer) {
- iree_hal_hip_buffer_set_allocation_empty(buffer);
- iree_hal_buffer_release(buffer);
- }
-
- IREE_TRACE_ZONE_END(z0);
- return status;
-}
-
// TODO: implement multiple streams; today we only have one and queue_affinity
// is ignored.
// TODO: implement proper semaphores in HIP to ensure ordering and avoid
@@ -1007,46 +935,6 @@
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
- if (device->supports_memory_pools &&
- !iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
- iree_hal_buffer_t* buffer = NULL;
-
- IREE_RETURN_IF_ERROR(iree_hal_hip_memory_pools_prepare_buffer(
- &device->memory_pools, device->hip_dispatch_stream, pool, params,
- allocation_size, &buffer));
-
- iree_status_t status = iree_hal_deferred_work_queue_enqueue_alloc(
- device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
- if (iree_status_is_ok(status)) {
- *out_buffer = buffer;
- } else {
- iree_hal_hip_buffer_set_allocation_empty(buffer);
- iree_hal_resource_release(&buffer->resource);
- }
- return status;
- } else if (!iree_all_bits_set(params.type,
- IREE_HAL_MEMORY_TYPE_HOST_VISIBLE) &&
- iree_hal_hip_allocator_isa(
- iree_hal_device_allocator(base_device))) {
- iree_hal_buffer_t* buffer = NULL;
-
- IREE_RETURN_IF_ERROR(iree_hal_hip_device_pepare_async_alloc(
- device, params, allocation_size, &buffer));
-
- iree_status_t status = iree_hal_deferred_work_queue_enqueue_alloc(
- device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
- if (iree_status_is_ok(status)) {
- status = iree_hal_deferred_work_queue_issue(device->work_queue);
- }
- if (iree_status_is_ok(status)) {
- *out_buffer = buffer;
- } else {
- iree_hal_hip_buffer_set_allocation_empty(buffer);
- iree_hal_resource_release(&buffer->resource);
- }
- return status;
- }
-
// NOTE: block on the semaphores here; we could avoid this by properly
// sequencing device work with semaphores. The HIP HAL is not currently
// asynchronous.
@@ -1057,9 +945,17 @@
// exhaustion but the error may be deferred until a later synchronization.
// If pools are not supported we allocate a buffer as normal from whatever
// allocator is set on the device.
- iree_status_t status =
- iree_hal_allocator_allocate_buffer(iree_hal_device_allocator(base_device),
- params, allocation_size, out_buffer);
+ iree_status_t status = iree_ok_status();
+ if (device->supports_memory_pools &&
+ !iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ status = iree_hal_hip_memory_pools_allocate(
+ &device->memory_pools, device->hip_dispatch_stream, pool, params,
+ allocation_size, out_buffer);
+ } else {
+ status = iree_hal_allocator_allocate_buffer(
+ iree_hal_device_allocator(base_device), params, allocation_size,
+ out_buffer);
+ }
// Only signal if not returning a synchronous error - synchronous failure
// indicates that the stream is unchanged (it's not really since we waited
@@ -1080,10 +976,6 @@
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* buffer) {
iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
- if (iree_hal_hip_allocator_isa(iree_hal_device_allocator(base_device))) {
- return iree_hal_deferred_work_queue_enqueue_dealloc(
- device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
- }
// NOTE: block on the semaphores here; we could avoid this by properly
// sequencing device work with semaphores. The HIP HAL is not currently
@@ -1276,10 +1168,6 @@
iree_hal_hip_deferred_work_queue_device_interface_create_stream_command_buffer,
.submit_command_buffer =
iree_hal_hip_deferred_work_queue_device_interface_submit_command_buffer,
- .async_alloc =
- iree_hal_hip_deferred_work_queue_device_interface_async_alloc,
- .async_dealloc =
- iree_hal_hip_deferred_work_queue_device_interface_async_dealloc,
};
static const iree_hal_stream_tracing_device_interface_vtable_t
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.c b/runtime/src/iree/hal/drivers/hip/memory_pools.c
index 0258fa0..89e27fa 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.c
@@ -202,41 +202,13 @@
IREE_TRACE_ZONE_BEGIN(z0);
hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
- if (device_ptr) {
- IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFree(device_ptr));
- }
+ IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFree(device_ptr));
iree_hal_hip_memory_pool_track_free(pools, buffer);
IREE_TRACE_ZONE_END(z0);
}
-iree_status_t iree_hal_hip_memory_pools_allocate_pointer(
- iree_hal_hip_memory_pools_t* pools, iree_hal_buffer_t* buffer,
- hipStream_t stream, iree_device_size_t allocation_size) {
- // TODO: more pools and better selection; this is coarsely deciding between
- // only device local (variables, constants, transients) and other (staging,
- // external) but could use more buffer properties (including usage/export
- // flags) to better isolate the different usage patterns and keep the pools
- // operating with reasonable limits. We should be using the |pool| arg.
- hipMemPool_t memory_pool =
- iree_all_bits_set(iree_hal_buffer_memory_type(buffer),
- IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)
- ? pools->device_local
- : pools->other;
-
- hipDeviceptr_t device_ptr = NULL;
- IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
- pools->hip_symbols,
- hipMallocFromPoolAsync(&device_ptr, (size_t)allocation_size, memory_pool,
- stream),
- "hipMallocFromPoolAsync"));
-
- iree_hal_hip_buffer_set_device_pointer(buffer, device_ptr);
- iree_hal_hip_memory_pool_track_alloc(pools, buffer);
- return iree_ok_status();
-}
-
-iree_status_t iree_hal_hip_memory_pools_prepare_buffer(
+iree_status_t iree_hal_hip_memory_pools_allocate(
iree_hal_hip_memory_pools_t* pools, hipStream_t stream,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
@@ -246,27 +218,49 @@
iree_hal_buffer_params_canonicalize(¶ms);
+ // TODO: more pools and better selection; this is coarsely deciding between
+ // only device local (variables, constants, transients) and other (staging,
+ // external) but could use more buffer properties (including usage/export
+ // flags) to better isolate the different usage patterns and keep the pools
+ // operating with reasonable limits. We should be using the |pool| arg.
+ hipMemPool_t memory_pool =
+ iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)
+ ? pools->device_local
+ : pools->other;
+
+ hipDeviceptr_t device_ptr = NULL;
+ iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
+ pools->hip_symbols,
+ hipMallocFromPoolAsync(&device_ptr, (size_t)allocation_size, memory_pool,
+ stream),
+ "hipMallocFromPoolAsync");
+
+ // Wrap the allocated HIP buffer in a HAL buffer.
// NOTE: we don't provide a device allocator because we didn't allocate from
// one and instead we use a release callback to perform the free if the user
// doesn't dealloca the buffer.
iree_hal_buffer_t* buffer = NULL;
- iree_hal_buffer_release_callback_t release_callback = {
- .fn = iree_hal_hip_async_buffer_release_callback,
- .user_data = pools,
- };
- iree_status_t status = iree_hal_hip_buffer_wrap(
- /*device_allocator=*/NULL, params.type, params.access, params.usage,
- allocation_size, /*byte_offset=*/0,
- /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
- /*device_ptr*/ NULL, /*host_ptr=*/NULL, release_callback,
- pools->host_allocator, &buffer);
+ if (iree_status_is_ok(status)) {
+ iree_hal_buffer_release_callback_t release_callback = {
+ .fn = iree_hal_hip_async_buffer_release_callback,
+ .user_data = pools,
+ };
+ status = iree_hal_hip_buffer_wrap(
+ /*device_allocator=*/NULL, params.type, params.access, params.usage,
+ allocation_size, /*byte_offset=*/0,
+ /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
+ device_ptr, /*host_ptr=*/NULL, release_callback, pools->host_allocator,
+ &buffer);
+ }
if (iree_status_is_ok(status)) {
// Update statistics (note that it may not yet be accurate).
+ iree_hal_hip_memory_pool_track_alloc(pools, buffer);
*out_buffer = buffer;
} else if (buffer) {
- iree_hal_hip_buffer_set_allocation_empty(buffer);
iree_hal_buffer_release(buffer);
+ } else {
+ IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFreeAsync(device_ptr, stream));
}
IREE_TRACE_ZONE_END(z0);
@@ -288,10 +282,8 @@
if (iree_hal_hip_buffer_type(buffer) == IREE_HAL_HIP_BUFFER_TYPE_ASYNC) {
// Try to schedule the buffer for freeing.
hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
- if (device_ptr) {
- status = IREE_HIP_RESULT_TO_STATUS(
- pools->hip_symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync");
- }
+ status = IREE_HIP_RESULT_TO_STATUS(
+ pools->hip_symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync");
if (iree_status_is_ok(status)) {
// Drop the release callback so that we don't try to double-free the
// buffer. Note that we only do this if the HIP free succeeded as
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.h b/runtime/src/iree/hal/drivers/hip/memory_pools.h
index c505bbe..e39370a 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.h
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.h
@@ -63,15 +63,9 @@
iree_hal_hip_memory_pools_t* pools,
const iree_hal_hip_memory_pooling_params_t* pooling_params);
-iree_status_t iree_hal_hip_memory_pools_allocate_pointer(
- iree_hal_hip_memory_pools_t* pools, iree_hal_buffer_t* buffer,
- hipStream_t stream, iree_device_size_t allocation_size);
-
-// Prepares a buffer to be allocated from the given pool.
-// It does no actual allocations, they must happen on another thread.
-// Any calls to get the device_ptr from the buffer will
-// result in a wait until the allocation is available.
-iree_status_t iree_hal_hip_memory_pools_prepare_buffer(
+// Asynchronously allocates a buffer from an appropriate pool.
+// The allocation will be stream-ordered on |stream|.
+iree_status_t iree_hal_hip_memory_pools_allocate(
iree_hal_hip_memory_pools_t* pools, hipStream_t stream,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.c b/runtime/src/iree/hal/utils/deferred_work_queue.c
index c783bbe..e41fe35 100644
--- a/runtime/src/iree/hal/utils/deferred_work_queue.c
+++ b/runtime/src/iree/hal/utils/deferred_work_queue.c
@@ -19,30 +19,14 @@
// The maximal number of events a command buffer can wait on.
#define IREE_HAL_MAX_WAIT_EVENT_COUNT 32
-#define IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS 0
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-IREE_TRACE(static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME =
- "iree_hal_work_queue_pending");
-IREE_TRACE(static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME =
- "iree_hal_work_queue_ready");
-IREE_TRACE(
- static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME =
- "iree_hal_work_queue_pending_alloc");
-IREE_TRACE(static const char*
- IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME =
- "iree_hal_work_queue_pending_dealloc");
-#endif // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
//===----------------------------------------------------------------------===//
// Queue action
//===----------------------------------------------------------------------===//
typedef enum iree_hal_deferred_work_queue_action_kind_e {
IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION,
- IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC,
- IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC,
- IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX =
- IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC,
+ // TODO: Add support for queue alloca and dealloca.
} iree_hal_deferred_work_queue_action_kind_t;
typedef enum iree_hal_deferred_work_queue_action_state_e {
@@ -83,12 +67,6 @@
iree_hal_command_buffer_t** command_buffers;
iree_hal_buffer_binding_table_t* binding_tables;
} execution;
- struct {
- iree_hal_buffer_t* buffer;
- } alloc;
- struct {
- iree_hal_buffer_t* buffer;
- } dealloc;
} payload;
// Resource set to retain all associated resources by the payload.
@@ -523,21 +501,6 @@
IREE_ASSERT_ARGUMENT(out_actions);
IREE_TRACE_ZONE_BEGIN(z0);
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
- IREE_TRACE_SET_PLOT_TYPE(IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME,
- IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true,
- /*fill=*/true, /*color=*/0);
- IREE_TRACE_SET_PLOT_TYPE(IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME,
- IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true,
- /*fill=*/true, /*color=*/0);
- IREE_TRACE_SET_PLOT_TYPE(
- IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME,
- IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true, /*fill=*/true, /*color=*/0);
- IREE_TRACE_SET_PLOT_TYPE(
- IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME,
- IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true, /*fill=*/true, /*color=*/0);
-#endif // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-
iree_hal_deferred_work_queue_t* actions = NULL;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, sizeof(*actions),
@@ -849,140 +812,6 @@
return status;
}
-static iree_status_t iree_hal_deferred_work_queue_enqueue_buffer_operation(
- iree_hal_deferred_work_queue_t* actions,
- iree_hal_deferred_work_queue_action_kind_t kind,
- const iree_hal_semaphore_list_t wait_semaphore_list,
- const iree_hal_semaphore_list_t signal_semaphore_list,
- iree_hal_buffer_t* buffer) {
- IREE_ASSERT_ARGUMENT(actions);
- IREE_ASSERT_ARGUMENT(buffer);
- IREE_TRACE_ZONE_BEGIN(z0);
-
- // Embed captured tables in the action allocation.
- iree_hal_deferred_work_queue_action_t* action = NULL;
- const iree_host_size_t wait_semaphore_list_size =
- wait_semaphore_list.count * sizeof(*wait_semaphore_list.semaphores) +
- wait_semaphore_list.count * sizeof(*wait_semaphore_list.payload_values);
- const iree_host_size_t signal_semaphore_list_size =
- signal_semaphore_list.count * sizeof(*signal_semaphore_list.semaphores) +
- signal_semaphore_list.count *
- sizeof(*signal_semaphore_list.payload_values);
-
- const iree_host_size_t total_action_size =
- sizeof(*action) + wait_semaphore_list_size + signal_semaphore_list_size;
-
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_allocator_malloc(actions->host_allocator, total_action_size,
- (void**)&action));
- uint8_t* action_ptr = (uint8_t*)action + sizeof(*action);
-
- action->owning_actions = actions;
- action->device_interface = actions->device_interface;
- action->state = IREE_HAL_QUEUE_ACTION_STATE_ALIVE;
- action->cleanup_callback = NULL;
- action->callback_user_data = NULL;
- action->kind = kind;
-
- // Initialize scratch fields.
- action->event_count = 0;
- action->is_pending = true;
-
- // Copy wait list for later access.
- action->wait_semaphore_list.count = wait_semaphore_list.count;
- action->wait_semaphore_list.semaphores = (iree_hal_semaphore_t**)action_ptr;
- memcpy(action->wait_semaphore_list.semaphores, wait_semaphore_list.semaphores,
- wait_semaphore_list.count * sizeof(*wait_semaphore_list.semaphores));
- action->wait_semaphore_list.payload_values =
- (uint64_t*)(action_ptr + wait_semaphore_list.count *
- sizeof(*wait_semaphore_list.semaphores));
- memcpy(
- action->wait_semaphore_list.payload_values,
- wait_semaphore_list.payload_values,
- wait_semaphore_list.count * sizeof(*wait_semaphore_list.payload_values));
- action_ptr += wait_semaphore_list_size;
-
- // Copy signal list for later access.
- action->signal_semaphore_list.count = signal_semaphore_list.count;
- action->signal_semaphore_list.semaphores = (iree_hal_semaphore_t**)action_ptr;
- memcpy(
- action->signal_semaphore_list.semaphores,
- signal_semaphore_list.semaphores,
- signal_semaphore_list.count * sizeof(*signal_semaphore_list.semaphores));
- action->signal_semaphore_list.payload_values =
- (uint64_t*)(action_ptr + signal_semaphore_list.count *
- sizeof(*signal_semaphore_list.semaphores));
- memcpy(action->signal_semaphore_list.payload_values,
- signal_semaphore_list.payload_values,
- signal_semaphore_list.count *
- sizeof(*signal_semaphore_list.payload_values));
- action_ptr += signal_semaphore_list_size;
-
- // Copy the execution resources for later access.
- action->payload.alloc.buffer = buffer;
-
- // Retain all command buffers and semaphores.
- iree_status_t status = iree_hal_resource_set_allocate(actions->block_pool,
- &action->resource_set);
- if (iree_status_is_ok(status)) {
- status = iree_hal_resource_set_insert(action->resource_set,
- wait_semaphore_list.count,
- wait_semaphore_list.semaphores);
- }
- if (iree_status_is_ok(status)) {
- status = iree_hal_resource_set_insert(action->resource_set,
- signal_semaphore_list.count,
- signal_semaphore_list.semaphores);
- }
- if (iree_status_is_ok(status)) {
- status = iree_hal_resource_set_insert(action->resource_set, 1, &buffer);
- }
-
- if (iree_status_is_ok(status)) {
- // Now everything is okay and we can enqueue the action.
- iree_slim_mutex_lock(&actions->action_mutex);
- if (actions->exit_requested) {
- status = iree_make_status(
- IREE_STATUS_ABORTED,
- "can not issue more executions, exit already requested");
- iree_hal_deferred_work_queue_action_fail_locked(action, status);
- } else {
- iree_hal_deferred_work_queue_action_list_push_back(&actions->action_list,
- action);
- // One work item is the callback that makes it across from the
- // completion thread.
- actions->pending_work_items_count += 1;
- }
- iree_slim_mutex_unlock(&actions->action_mutex);
- } else {
- iree_hal_resource_set_free(action->resource_set);
- iree_allocator_free(actions->host_allocator, action);
- }
-
- IREE_TRACE_ZONE_END(z0);
- return status;
-}
-
-iree_status_t iree_hal_deferred_work_queue_enqueue_alloc(
- iree_hal_deferred_work_queue_t* actions,
- const iree_hal_semaphore_list_t wait_semaphore_list,
- const iree_hal_semaphore_list_t signal_semaphore_list,
- iree_hal_buffer_t* buffer) {
- return iree_hal_deferred_work_queue_enqueue_buffer_operation(
- actions, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC, wait_semaphore_list,
- signal_semaphore_list, buffer);
-}
-
-iree_status_t iree_hal_deferred_work_queue_enqueue_dealloc(
- iree_hal_deferred_work_queue_t* actions,
- const iree_hal_semaphore_list_t wait_semaphore_list,
- const iree_hal_semaphore_list_t signal_semaphore_list,
- iree_hal_buffer_t* buffer) {
- return iree_hal_deferred_work_queue_enqueue_buffer_operation(
- actions, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC, wait_semaphore_list,
- signal_semaphore_list, buffer);
-}
-
// Does not consume |status|.
static void iree_hal_deferred_work_queue_fail_status_locked(
iree_hal_deferred_work_queue_t* actions, iree_status_t status) {
@@ -1084,7 +913,7 @@
IREE_TRACE_ZONE_BEGIN(z0);
iree_hal_deferred_work_queue_action_t* action =
(iree_hal_deferred_work_queue_action_t*)user_data;
- IREE_ASSERT_LE(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX);
+ IREE_ASSERT_EQ(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION);
IREE_ASSERT_EQ(action->state, IREE_HAL_QUEUE_ACTION_STATE_ALIVE);
if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
iree_hal_deferred_work_queue_action_fail(action, status);
@@ -1112,7 +941,7 @@
// Issues the given kernel dispatch |action| to the GPU.
static iree_status_t iree_hal_deferred_work_queue_issue_execution(
iree_hal_deferred_work_queue_action_t* action) {
- IREE_ASSERT_LE(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX);
+ IREE_ASSERT_EQ(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION);
IREE_ASSERT_EQ(action->is_pending, false);
iree_hal_deferred_work_queue_t* actions = action->owning_actions;
iree_hal_deferred_work_queue_device_interface_t* device_interface =
@@ -1129,75 +958,56 @@
device_interface, action->wait_events[i]));
}
- switch (action->kind) {
- case IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION: {
- // Then launch all command buffers to the dispatch stream.
- IREE_TRACE_ZONE_BEGIN(z_dispatch_command_buffers);
- IREE_TRACE_ZONE_APPEND_TEXT(z_dispatch_command_buffers,
- "dispatch_command_buffers");
+ // Then launch all command buffers to the dispatch stream.
+ IREE_TRACE_ZONE_BEGIN(z_dispatch_command_buffers);
+ IREE_TRACE_ZONE_APPEND_TEXT(z_dispatch_command_buffers,
+ "dispatch_command_buffers");
- for (iree_host_size_t i = 0; i < action->payload.execution.count; ++i) {
- iree_hal_command_buffer_t* command_buffer =
- action->payload.execution.command_buffers[i];
- iree_hal_buffer_binding_table_t binding_table =
- action->payload.execution.binding_tables
- ? action->payload.execution.binding_tables[i]
- : iree_hal_buffer_binding_table_empty();
- if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
- iree_hal_command_buffer_t* stream_command_buffer = NULL;
- iree_hal_command_buffer_mode_t mode =
- iree_hal_command_buffer_mode(command_buffer) |
- IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
- // NOTE: we need to validate if a binding table is provided as the
- // bindings were not known when it was originally recorded.
- (iree_hal_buffer_binding_table_is_empty(binding_table)
- ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
- : 0);
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, device_interface->vtable->create_stream_command_buffer(
- device_interface, mode, IREE_HAL_COMMAND_CATEGORY_ANY,
- &stream_command_buffer))
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_hal_resource_set_insert(action->resource_set, 1,
- &stream_command_buffer));
-
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_hal_deferred_command_buffer_apply(
- command_buffer, stream_command_buffer, binding_table));
- command_buffer = stream_command_buffer;
- } else {
- iree_hal_resource_retain(command_buffer);
- }
-
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, device_interface->vtable->submit_command_buffer(
- device_interface, command_buffer));
-
- // The stream_command_buffer is going to be retained by
- // the action->resource_set and deleted after the action
- // completes.
- iree_hal_resource_release(command_buffer);
- }
-
- IREE_TRACE_ZONE_END(z_dispatch_command_buffers);
- break;
- }
- case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC: {
- IREE_TRACE_ZONE_APPEND_TEXT(z0, "queue_alloc");
+ for (iree_host_size_t i = 0; i < action->payload.execution.count; ++i) {
+ iree_hal_command_buffer_t* command_buffer =
+ action->payload.execution.command_buffers[i];
+ iree_hal_buffer_binding_table_t binding_table =
+ action->payload.execution.binding_tables
+ ? action->payload.execution.binding_tables[i]
+ : iree_hal_buffer_binding_table_empty();
+ if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
+ iree_hal_command_buffer_t* stream_command_buffer = NULL;
+ iree_hal_command_buffer_mode_t mode =
+ iree_hal_command_buffer_mode(command_buffer) |
+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+ // NOTE: we need to validate if a binding table is provided as the
+ // bindings were not known when it was originally recorded.
+ (iree_hal_buffer_binding_table_is_empty(binding_table)
+ ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
+ : 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, device_interface->vtable->async_alloc(
- device_interface, action->payload.alloc.buffer));
- break;
- }
- case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC: {
- IREE_TRACE_ZONE_APPEND_TEXT(z0, "queue_dealloc");
+ z0, device_interface->vtable->create_stream_command_buffer(
+ device_interface, mode, IREE_HAL_COMMAND_CATEGORY_ANY,
+ &stream_command_buffer))
IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, device_interface->vtable->async_dealloc(
- device_interface, action->payload.dealloc.buffer));
- break;
+ z0, iree_hal_resource_set_insert(action->resource_set, 1,
+ &stream_command_buffer));
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_hal_deferred_command_buffer_apply(
+ command_buffer, stream_command_buffer, binding_table));
+ command_buffer = stream_command_buffer;
+ } else {
+ iree_hal_resource_retain(command_buffer);
}
+
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, device_interface->vtable->submit_command_buffer(device_interface,
+ command_buffer));
+
+ // The stream_command_buffer is going to be retained by
+ // the action->resource_set and deleted after the action
+ // completes.
+ iree_hal_resource_release(command_buffer);
}
+ IREE_TRACE_ZONE_END(z_dispatch_command_buffers);
+
iree_hal_deferred_work_queue_native_event_t completion_event = NULL;
// Last record event signals in the dispatch stream.
for (iree_host_size_t i = 0; i < action->signal_semaphore_list.count; ++i) {
@@ -1284,12 +1094,6 @@
return iree_ok_status();
}
- IREE_TRACE_ZONE_BEGIN(z1);
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
- IREE_TRACE(uint32_t num_pending = 0; uint32_t num_pending_alloc = 0;
- uint32_t num_pending_dealloc = 0; uint32_t num_ready = 0;);
-#endif // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-
iree_status_t status = iree_ok_status();
// Scan through the list and categorize actions into pending and ready lists.
while (!iree_hal_deferred_work_queue_action_list_is_empty(
@@ -1381,44 +1185,12 @@
}
if (action->is_pending) {
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
- IREE_TRACE({
- ++num_pending;
- switch (action->kind) {
- case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC:
- ++num_pending_alloc;
- break;
- case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC:
- ++num_pending_dealloc;
- break;
- default:
- break;
- }
- });
-#endif // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
iree_hal_deferred_work_queue_action_list_push_back(&pending_list, action);
} else {
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
- IREE_TRACE(++num_ready;);
-#endif // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
iree_hal_deferred_work_queue_action_list_push_back(&ready_list, action);
}
}
- IREE_TRACE_ZONE_END(z1);
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
- IREE_TRACE_PLOT_VALUE_I64(IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME,
- num_pending);
- IREE_TRACE_PLOT_VALUE_I64(IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME,
- num_ready);
- IREE_TRACE_PLOT_VALUE_I64(
- IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME,
- num_pending_alloc);
- IREE_TRACE_PLOT_VALUE_I64(
- IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME,
- num_pending_dealloc);
-#endif
-
// Preserve pending timepoints.
actions->action_list = pending_list;
@@ -1553,10 +1325,8 @@
IREE_TRACE_ZONE_END(z1);
}
- if (entry->callback) {
- status =
- iree_status_join(status, entry->callback(status, entry->user_data));
- }
+ status =
+ iree_status_join(status, entry->callback(status, entry->user_data));
if (IREE_UNLIKELY(entry->created_event)) {
status = iree_status_join(
diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.h b/runtime/src/iree/hal/utils/deferred_work_queue.h
index c076a8d..3a25886 100644
--- a/runtime/src/iree/hal/utils/deferred_work_queue.h
+++ b/runtime/src/iree/hal/utils/deferred_work_queue.h
@@ -109,24 +109,6 @@
iree_status_t(IREE_API_PTR* submit_command_buffer)(
iree_hal_deferred_work_queue_device_interface_t* device_interface,
iree_hal_command_buffer_t* command_buffer);
-
- // Asynchronously allocates a pointer and assigns it to the given buffer.
- //
- // This is optional, and is only required to be valid if
- // iree_hal_deferred_work_queue_enqueue_alloc is ever called on the work
- // queue.
- iree_status_t(IREE_API_PTR* async_alloc)(
- iree_hal_deferred_work_queue_device_interface_t* device_interface,
- iree_hal_buffer_t* buffer);
-
- // Asynchronously frees a buffer.
- //
- // This is optional, and is only required to be valid if
- // iree_hal_deferred_work_queue_enqueue_dealloc is ever called on the work
- // queue.
- iree_status_t(IREE_API_PTR* async_dealloc)(
- iree_hal_deferred_work_queue_device_interface_t* device_interface,
- iree_hal_buffer_t* buffer);
} iree_hal_deferred_work_queue_device_interface_vtable_t;
iree_status_t iree_hal_deferred_work_queue_create(
@@ -152,22 +134,6 @@
iree_hal_command_buffer_t* const* command_buffers,
iree_hal_buffer_binding_table_t const* binding_tables);
-// Enqueues allocations into the work queue to be executed
-// once all semaphores have been satisfied.
-iree_status_t iree_hal_deferred_work_queue_enqueue_alloc(
- iree_hal_deferred_work_queue_t* deferred_work_queue,
- const iree_hal_semaphore_list_t wait_semaphore_list,
- const iree_hal_semaphore_list_t signal_semaphore_list,
- iree_hal_buffer_t* buffer);
-
-// Enqueues deallocations into the work queue to be executed
-// once all semaphores have been satisfied.
-iree_status_t iree_hal_deferred_work_queue_enqueue_dealloc(
- iree_hal_deferred_work_queue_t* deferred_work_queue,
- const iree_hal_semaphore_list_t wait_semaphore_list,
- const iree_hal_semaphore_list_t signal_semaphore_list,
- iree_hal_buffer_t* buffer);
-
// Attempts to advance the work queue by processing using
// the current thread, rather than the worker thread.
iree_status_t iree_hal_deferred_work_queue_issue(