Finally removing hal.ex.submit_and_wait 🎉.
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp
index 08609db..93b3f14 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp
@@ -17,8 +17,6 @@
RewritePatternSet &patterns) {
patterns.insert<VMImportOpConversion<IREE::HAL::ExSharedDeviceOp>>(
context, importSymbols, typeConverter, "hal.ex.shared_device");
- patterns.insert<VMImportOpConversion<IREE::HAL::ExSubmitAndWaitOp>>(
- context, importSymbols, typeConverter, "hal.ex.submit_and_wait");
}
} // namespace iree_compiler
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
index c2a0701..67fd07a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
@@ -45,15 +45,6 @@
];
}
-def HAL_ExSubmitAndWaitOp : HAL_Op<"ex.submit_and_wait", [Util_YieldPoint]> {
- let arguments = (ins
- HAL_Device:$device,
- HAL_CommandBuffer:$command_buffer
- );
-
- let assemblyFormat = "$device `,` $command_buffer attr-dict";
-}
-
//===----------------------------------------------------------------------===//
// Pseudo ops for conversion support
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
index 8136e4a..1c8cabb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
@@ -6,14 +6,3 @@
%device = hal.ex.shared_device : !hal.device
return %device : !hal.device
}
-
-// -----
-
-// CHECK-LABEL: @submit_and_wait
-func.func @submit_and_wait() {
- %0 = "test_hal.device"() : () -> !hal.device
- %1 = "test_hal.command_buffer"() : () -> !hal.command_buffer
- // CHECK: hal.ex.submit_and_wait %0, %1
- hal.ex.submit_and_wait %0, %1
- return
-}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
index 9c7fa98..65c72e3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
@@ -312,9 +312,27 @@
forBuilder.create<scf::YieldOp>(loc);
});
- // Submit command buffer.
funcBuilder.create<IREE::HAL::CommandBufferFinalizeOp>(loc, commandBuffer);
- funcBuilder.create<IREE::HAL::ExSubmitAndWaitOp>(loc, device, commandBuffer);
+
+ // We begin executing immediately and then wait on a fence.
+ // TODO(benvanik): add fences to ABI so the benchmark tool can pipeline.
+ Value waitFence = funcBuilder.create<IREE::Util::NullOp>(
+ loc, funcBuilder.getType<IREE::HAL::FenceType>());
+ Value signalFence = funcBuilder.create<IREE::HAL::TimelineAdvanceOp>(
+ loc, funcBuilder.getType<IREE::HAL::FenceType>());
+
+ // Queue execution.
+ auto queueAffinity = funcBuilder.create<arith::ConstantIntOp>(loc, -1, 64);
+ funcBuilder.create<IREE::HAL::DeviceQueueExecuteOp>(
+ loc, device, queueAffinity, waitFence, signalFence,
+ ValueRange{commandBuffer});
+
+ // Block until it completes.
+ Value timeoutMillis = funcBuilder.create<arith::ConstantIntOp>(loc, -1, 32);
+ auto fenceOp = funcBuilder.create<IREE::HAL::FenceAwaitOp>(
+ loc, funcBuilder.getI32Type(), timeoutMillis, signalFence);
+ funcBuilder.create<IREE::Util::StatusCheckOkOp>(
+ loc, fenceOp.getStatus(), "failed to wait on timepoint");
funcBuilder.create<mlir::func::ReturnOp>(loc);
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
index a862620..d80cdb3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
@@ -90,7 +90,7 @@
// Submit and wait for dispatches to complete:
// CHECK: hal.command_buffer.finalize<%[[CMD]] : !hal.command_buffer>
- // CHECK: hal.ex.submit_and_wait %{{.+}}, %[[CMD]]
+ // CHECK: hal.fence.await
// ===========================================================================
// @dispatch1 benchmark logic (note two deduplicated dispatches):
diff --git a/runtime/src/iree/modules/hal/exports.inl b/runtime/src/iree/modules/hal/exports.inl
index ebc7c43..988841f 100644
--- a/runtime/src/iree/modules/hal/exports.inl
+++ b/runtime/src/iree/modules/hal/exports.inl
@@ -68,7 +68,6 @@
EXPORT_FN("device.queue.flush", iree_hal_module_device_queue_flush, rI, v)
EXPORT_FN("ex.shared_device", iree_hal_module_ex_shared_device, v, r)
-EXPORT_FN("ex.submit_and_wait", iree_hal_module_ex_submit_and_wait, rr, v)
EXPORT_FN("executable.create", iree_hal_module_executable_create, rrrrCrD, r)
diff --git a/runtime/src/iree/modules/hal/module.c b/runtime/src/iree/modules/hal/module.c
index bcf6dfc..bc0fe89 100644
--- a/runtime/src/iree/modules/hal/module.c
+++ b/runtime/src/iree/modules/hal/module.c
@@ -58,10 +58,6 @@
// executables like ones for training vs inference in the same model, or just
// always use this.
iree_hal_executable_cache_t* executable_cache;
-
- // TODO(benvanik): remove with submit_and_wait.
- iree_hal_semaphore_t* submit_semaphore;
- uint64_t submit_value;
} iree_hal_module_state_t;
static void IREE_API_PTR iree_hal_module_destroy(void* base_module) {
@@ -91,11 +87,6 @@
state->shared_device, iree_string_view_empty(),
iree_loop_inline(&state->loop_status), &state->executable_cache));
- state->submit_value = 0ull;
- IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_hal_semaphore_create(state->shared_device, state->submit_value,
- &state->submit_semaphore));
-
*out_module_state = (iree_vm_module_state_t*)state;
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
@@ -106,7 +97,6 @@
IREE_TRACE_ZONE_BEGIN(z0);
iree_hal_module_state_t* state = (iree_hal_module_state_t*)module_state;
- iree_hal_semaphore_release(state->submit_semaphore);
iree_hal_executable_cache_release(state->executable_cache);
iree_status_ignore(state->loop_status);
iree_hal_device_release(state->shared_device);
@@ -140,40 +130,6 @@
return iree_ok_status();
}
-IREE_VM_ABI_EXPORT(iree_hal_module_ex_submit_and_wait, //
- iree_hal_module_state_t, //
- rr, v) {
- iree_hal_device_t* device = NULL;
- IREE_RETURN_IF_ERROR(iree_hal_device_check_deref(args->r0, &device));
- iree_hal_command_buffer_t* command_buffer = NULL;
- IREE_RETURN_IF_ERROR(
- iree_hal_command_buffer_check_deref(args->r1, &command_buffer));
-
- // Batch with our single command buffer.
- iree_hal_submission_batch_t batch;
- memset(&batch, 0, sizeof(batch));
-
- iree_hal_command_buffer_t* command_buffer_ptrs[] = {command_buffer};
- batch.command_buffer_count = IREE_ARRAYSIZE(command_buffer_ptrs);
- batch.command_buffers = command_buffer_ptrs;
-
- uint64_t next_semaphore_value = ++state->submit_value;
- iree_hal_semaphore_t* signal_semaphore_ptrs[] = {state->submit_semaphore};
- uint64_t signal_semaphore_values[] = {next_semaphore_value};
- batch.signal_semaphores.count = IREE_ARRAYSIZE(signal_semaphore_ptrs);
- batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
- batch.signal_semaphores.payload_values = signal_semaphore_values;
-
- iree_status_t status = iree_hal_device_queue_submit(
- device, IREE_HAL_COMMAND_CATEGORY_ANY, 0, 1, &batch);
- if (iree_status_is_ok(status)) {
- status = iree_hal_semaphore_wait(
- state->submit_semaphore, next_semaphore_value, iree_infinite_timeout());
- }
-
- return status;
-}
-
//===----------------------------------------------------------------------===//
// Utilities
//===----------------------------------------------------------------------===//