blob: 245e2870ed282df9c8c9e7cda91294a856ac4ec6 [file] [log] [blame]
// RUN: iree-compile %s --iree-execution-model=async-external --iree-hal-target-backends=llvm-cpu | custom-module-async-run - example.main | FileCheck %s
module @example {
//===--------------------------------------------------------------------===//
// Imports
//===--------------------------------------------------------------------===//
// External function declarations for the methods implemented in the custom
// module C++ file. Note that they are prefixed with the `custom.` module
// name.
// Asynchronous call that takes/returns a tensor.
// IREE will pass in a HAL fence indicating when the input tensor is available
// and a HAL fence that the call can use to indicate when the returned tensor
// is available. It's expected that the call will not block.
//
// Note that `nosideeffects` is critical to ensuring asynchronous execution.
// When omitted IREE will still pass in the fences but wait on the signal
// fence after the call completes before continuing. This may be required when
// returning custom types or synchronizing with external systems.
func.func private @custom.call.async(tensor<?xi32>) -> tensor<?xi32> attributes {
iree.abi.model = "coarse-fences",
nosideeffects
}
//===--------------------------------------------------------------------===//
// Sample methods
//===--------------------------------------------------------------------===//
// Note that there can be any number of publicly-exported methods; this simple
// sample just has one to keep things simple.
func.func @main(%arg0: tensor<?xi32>) -> tensor<?xi32> {
// Compiler-generated dispatch work to show dataflow.
%0 = arith.muli %arg0, %arg0 : tensor<?xi32>
// Custom call to an asynchronous import.
// The runtime will chain together the async work to produce %0 and make the
// call with a wait fence indicating when %0 is ready. The call *should*
// return immediately with a newly allocated but not yet populated %1. The
// runtime will then continue to chain the subsequent %2 work pending the
// signal from the call indicating that %1 is ready for use.
//
// Note that allocations are generally blocking unless performed with the
// queue-ordered allocation APIs that chain on to fences.
%1 = call @custom.call.async(%0) : (tensor<?xi32>) -> tensor<?xi32>
// More generated dispatch work to show dataflow.
%2 = arith.muli %1, %1 : tensor<?xi32>
return %2 : tensor<?xi32>
}
// TODO(benvanik): fix wait-before-signal on queue-ordered allocations.
// For now we have to signal to T=1 before invoking the function but that's
// only temporary.
// CHECK: INITIALIZE T=0
// CHECK: SIGNALED T=1
// CHECK: VM INVOKE BEGIN example.main
// CHECK: VM INVOKE END
// CHECK: REACHED T=2
// CHECK: MATCHED!
}