| // RUN: iree-compile %s --iree-execution-model=async-external --iree-hal-target-backends=llvm-cpu | custom-module-async-run - example.main | FileCheck %s |
| |
| module @example { |
| //===--------------------------------------------------------------------===// |
| // Imports |
| //===--------------------------------------------------------------------===// |
| // External function declarations for the methods implemented in the custom |
| // module C++ file. Note that they are prefixed with the `custom.` module |
| // name. |
| |
| // Asynchronous call that takes/returns a tensor. |
| // IREE will pass in a HAL fence indicating when the input tensor is available |
| // and a HAL fence that the call can use to indicate when the returned tensor |
| // is available. It's expected that the call will not block. |
| // |
| // Note that `nosideeffects` is critical to ensuring asynchronous execution. |
| // When omitted IREE will still pass in the fences but wait on the signal |
| // fence after the call completes before continuing. This may be required when |
| // returning custom types or synchronizing with external systems. |
| func.func private @custom.call.async(tensor<?xi32>) -> tensor<?xi32> attributes { |
| iree.abi.model = "coarse-fences", |
| nosideeffects |
| } |
| |
| //===--------------------------------------------------------------------===// |
| // Sample methods |
| //===--------------------------------------------------------------------===// |
| // Note that there can be any number of publicly-exported methods; this simple |
| // sample just has one to keep things simple. |
| |
| func.func @main(%arg0: tensor<?xi32>) -> tensor<?xi32> { |
| // Compiler-generated dispatch work to show dataflow. |
| %0 = arith.muli %arg0, %arg0 : tensor<?xi32> |
| |
| // Custom call to an asynchronous import. |
| // The runtime will chain together the async work to produce %0 and make the |
| // call with a wait fence indicating when %0 is ready. The call *should* |
| // return immediately with a newly allocated but not yet populated %1. The |
| // runtime will then continue to chain the subsequent %2 work pending the |
| // signal from the call indicating that %1 is ready for use. |
| // |
| // Note that allocations are generally blocking unless performed with the |
| // queue-ordered allocation APIs that chain on to fences. |
| %1 = call @custom.call.async(%0) : (tensor<?xi32>) -> tensor<?xi32> |
| |
| // More generated dispatch work to show dataflow. |
| %2 = arith.muli %1, %1 : tensor<?xi32> |
| |
| return %2 : tensor<?xi32> |
| } |
| |
| // TODO(benvanik): fix wait-before-signal on queue-ordered allocations. |
| // For now we have to signal to T=1 before invoking the function but that's |
| // only temporary. |
| // CHECK: INITIALIZE T=0 |
| // CHECK: SIGNALED T=1 |
| // CHECK: VM INVOKE BEGIN example.main |
| // CHECK: VM INVOKE END |
| // CHECK: REACHED T=2 |
| // CHECK: MATCHED! |
| } |