| // RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu | custom-module-sync-run - example.main | FileCheck %s | 
 |  | 
 | module @example { | 
 |   //===--------------------------------------------------------------------===// | 
 |   // Imports | 
 |   //===--------------------------------------------------------------------===// | 
 |   // External function declarations for the methods implemented in the custom | 
 |   // module C++ file. Note that they are prefixed with the `custom.` module | 
 |   // name. | 
 |  | 
 |   // Synchronous call that takes/returns a tensor. | 
 |   // IREE will block and wait until the input tensor is available, make the | 
 |   // import call, and assume that the returned tensor is immediately available | 
 |   // for use. | 
 |   func.func private @custom.call.sync(tensor<?xi32>) -> tensor<?xi32> | 
 |  | 
 |   //===--------------------------------------------------------------------===// | 
 |   // Sample methods | 
 |   //===--------------------------------------------------------------------===// | 
 |   // Note that there can be any number of publicly-exported methods; this simple | 
 |   // sample just has one to keep things simple. | 
 |  | 
 |   // CHECK-LABEL: INVOKE BEGIN example.main | 
 |   func.func @main(%arg0: tensor<?xi32>) -> tensor<?xi32> { | 
 |     // Compiler-generated dispatch work to show dataflow. | 
 |     %0 = arith.muli %arg0, %arg0 : tensor<?xi32> | 
 |  | 
 |     // Custom call to a synchronous import. | 
 |     // The runtime will block and wait until %0 is ready before making the call | 
 |     // and assume it can immediately start using the resulting %1 after the call | 
 |     // returns. Note that the top-level invocation will block while this call is | 
 |     // made and if we were running the compiler-generated dispatches above/below | 
 |     // on a GPU it would fully synchronize the host and device (really bad!). | 
 |     %1 = call @custom.call.sync(%0) : (tensor<?xi32>) -> tensor<?xi32> | 
 |  | 
 |     // More generated dispatch work to show dataflow. | 
 |     %2 = arith.muli %1, %1 : tensor<?xi32> | 
 |  | 
 |     // CHECK: MATCHED! | 
 |     return %2 : tensor<?xi32> | 
 |   } | 
 |   // CHECK-NEXT: INVOKE END | 
 | } |