Ben Vanik | eefc3b0 | 2022-11-27 09:14:53 -0800 | [diff] [blame] | 1 | // RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu | custom-module-sync-run - example.main | FileCheck %s |
| 2 | |
| 3 | module @example { |
| 4 | //===--------------------------------------------------------------------===// |
| 5 | // Imports |
| 6 | //===--------------------------------------------------------------------===// |
| 7 | // External function declarations for the methods implemented in the custom |
| 8 | // module C++ file. Note that they are prefixed with the `custom.` module |
| 9 | // name. |
| 10 | |
| 11 | // Synchronous call that takes/returns a tensor. |
| 12 | // IREE will block and wait until the input tensor is available, make the |
| 13 | // import call, and assume that the returned tensor is immediately available |
| 14 | // for use. |
| 15 | func.func private @custom.call.sync(tensor<?xi32>) -> tensor<?xi32> |
| 16 | |
| 17 | //===--------------------------------------------------------------------===// |
| 18 | // Sample methods |
| 19 | //===--------------------------------------------------------------------===// |
| 20 | // Note that there can be any number of publicly-exported methods; this simple |
| 21 | // sample just has one to keep things simple. |
| 22 | |
| 23 | // CHECK-LABEL: INVOKE BEGIN example.main |
| 24 | func.func @main(%arg0: tensor<?xi32>) -> tensor<?xi32> { |
| 25 | // Compiler-generated dispatch work to show dataflow. |
| 26 | %0 = arith.muli %arg0, %arg0 : tensor<?xi32> |
| 27 | |
| 28 | // Custom call to a synchronous import. |
| 29 | // The runtime will block and wait until %0 is ready before making the call |
| 30 | // and assume it can immediately start using the resulting %1 after the call |
| 31 | // returns. Note that the top-level invocation will block while this call is |
| 32 | // made and if we were running the compiler-generated dispatches above/below |
| 33 | // on a GPU it would fully synchronize the host and device (really bad!). |
| 34 | %1 = call @custom.call.sync(%0) : (tensor<?xi32>) -> tensor<?xi32> |
| 35 | |
| 36 | // More generated dispatch work to show dataflow. |
| 37 | %2 = arith.muli %1, %1 : tensor<?xi32> |
| 38 | |
| 39 | // CHECK: MATCHED! |
| 40 | return %2 : tensor<?xi32> |
| 41 | } |
| 42 | // CHECK-NEXT: INVOKE END |
| 43 | } |