samples/multiple_modules/pipeline_async.mlir - 3p/openxla/iree - Git at Google

 // RUN: (iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %p/module_a.mlir -o=%t.module_a.vmfb && \
 // RUN:  iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %p/module_b.mlir -o=%t.module_b.vmfb && \
 // RUN:  iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %s | \
 // RUN:  iree-run-module \
 // RUN:    --device=local-task \
 // RUN:    --module=%t.module_a.vmfb \
 // RUN:    --module=%t.module_b.vmfb \
 // RUN:    --module=- --function=run \
 // RUN:    --input=4096xf32=-2.0 \
 // RUN:    --expected_output=4096xf32=4.0) | \
 // RUN:  FileCheck %s
 // CHECK: [SUCCESS]

 // Functions declared in external modules - note `module_name.func_name`.
 // `abs` will allocate transient memory to pass back the result.
 // `mul` will use the provided output memory to produce the result in-place.
 // Note that though the returned SSA tensor value shares its storage with the
 // `%output` arg the returned value *must* be used to reference the produced
 // version of its contents.
 //
 // In this asynchronous example both functions follow the "coarse-fences" ABI
 // model where the compiler inserts a wait and signal fence pair on each call.
 // To enable this the modules must compiled with the
 // `--iree-execution-model=async-external` and the external declarations must
 // be annotated with the `iree.abi.model` attribute so that the compiler knows
 // the calls have the fences. Note that it's possible to have any combination of
 // asynchronous and synchronous modules and calls in the same program.
 func.func private @module_a.abs(%input: tensor<4096xf32>) -> tensor<4096xf32> attributes {
   iree.abi.model = "coarse-fences"
 }
 func.func private @module_b.mul(%lhs: tensor<4096xf32>, %rhs: tensor<4096xf32>, %output: tensor<4096xf32> {iree.abi.output = 0 : index}) -> tensor<4096xf32> attributes {
   iree.abi.model = "coarse-fences"
 }

 // Top-level pipeline invoked by the command line tool.
 // Since this is compiled with `--iree-execution-model=async-external` this
 // export will have a wait and signal fence pair that allows the hosting
 // application to execute the entire pipeline asynchronously.
 func.func @run(%input: tensor<4096xf32>) -> tensor<4096xf32> {
   // Make a simple call that produces a transient result tensor.
   // Since the call is asynchronous the result is not ready upon return to this
   // function and it'll be passed with the fence down to the consumer call.
   %input_abs = call @module_a.abs(%input) : (tensor<4096xf32>) -> tensor<4096xf32>

   // Allocate output storage for the next call. This isn't needed here and
   // functionally equivalent to `abs` above allocating its own transient memory
   // but demonstrates how in-place operations can be performed across module
   // boundaries. The allocation is asynchronous and will be passed with a fence
   // indicating when it's ready to the consumer call.
   %result_storage = tensor.empty() : tensor<4096xf32>

   // Make a call that produces its output in the given `%result_storage`.
   // The inputs and result storage are passed with their respective fences and
   // no guarantee that they are available at the time the call is made. The
   // `mul` implementation will chain its work with the fences and only signal
   // its fence when all transitive dependencies and its own execution has
   // completed.
   %result = call @module_b.mul(%input_abs, %input_abs, %result_storage) : (tensor<4096xf32>, tensor<4096xf32>, tensor<4096xf32>) -> tensor<4096xf32>

   // Return the final result value - note that we pass back the result of the
   // `mul` call that aliases the `%result_storage` representing the computed
   // value and not just `%result_storage`. This is required as the `%result` has
   // an associated fence indicating when it is available for use and using
   // `%result_storage` would just wait for the storage to be allocated and not
   // for the contents to have been populated by `mul`.
   return %result : tensor<4096xf32>
 }
	// RUN: (iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %p/module_a.mlir -o=%t.module_a.vmfb && \
	// RUN: iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %p/module_b.mlir -o=%t.module_b.vmfb && \
	// RUN: iree-compile --iree-execution-model=async-external --iree-hal-target-backends=vmvx %s \| \
	// RUN: iree-run-module \
	// RUN: --device=local-task \
	// RUN: --module=%t.module_a.vmfb \
	// RUN: --module=%t.module_b.vmfb \
	// RUN: --module=- --function=run \
	// RUN: --input=4096xf32=-2.0 \
	// RUN: --expected_output=4096xf32=4.0) \| \
	// RUN: FileCheck %s
	// CHECK: [SUCCESS]

	// Functions declared in external modules - note `module_name.func_name`.
	// `abs` will allocate transient memory to pass back the result.
	// `mul` will use the provided output memory to produce the result in-place.
	// Note that though the returned SSA tensor value shares its storage with the
	// `%output` arg the returned value must be used to reference the produced
	// version of its contents.
	//
	// In this asynchronous example both functions follow the "coarse-fences" ABI
	// model where the compiler inserts a wait and signal fence pair on each call.
	// To enable this the modules must compiled with the
	// `--iree-execution-model=async-external` and the external declarations must
	// be annotated with the `iree.abi.model` attribute so that the compiler knows
	// the calls have the fences. Note that it's possible to have any combination of
	// asynchronous and synchronous modules and calls in the same program.
	func.func private @module_a.abs(%input: tensor<4096xf32>) -> tensor<4096xf32> attributes {
	iree.abi.model = "coarse-fences"
	}
	func.func private @module_b.mul(%lhs: tensor<4096xf32>, %rhs: tensor<4096xf32>, %output: tensor<4096xf32> {iree.abi.output = 0 : index}) -> tensor<4096xf32> attributes {
	iree.abi.model = "coarse-fences"
	}

	// Top-level pipeline invoked by the command line tool.
	// Since this is compiled with `--iree-execution-model=async-external` this
	// export will have a wait and signal fence pair that allows the hosting
	// application to execute the entire pipeline asynchronously.
	func.func @run(%input: tensor<4096xf32>) -> tensor<4096xf32> {
	// Make a simple call that produces a transient result tensor.
	// Since the call is asynchronous the result is not ready upon return to this
	// function and it'll be passed with the fence down to the consumer call.
	%input_abs = call @module_a.abs(%input) : (tensor<4096xf32>) -> tensor<4096xf32>

	// Allocate output storage for the next call. This isn't needed here and
	// functionally equivalent to `abs` above allocating its own transient memory
	// but demonstrates how in-place operations can be performed across module
	// boundaries. The allocation is asynchronous and will be passed with a fence
	// indicating when it's ready to the consumer call.
	%result_storage = tensor.empty() : tensor<4096xf32>

	// Make a call that produces its output in the given `%result_storage`.
	// The inputs and result storage are passed with their respective fences and
	// no guarantee that they are available at the time the call is made. The
	// `mul` implementation will chain its work with the fences and only signal
	// its fence when all transitive dependencies and its own execution has
	// completed.
	%result = call @module_b.mul(%input_abs, %input_abs, %result_storage) : (tensor<4096xf32>, tensor<4096xf32>, tensor<4096xf32>) -> tensor<4096xf32>

	// Return the final result value - note that we pass back the result of the
	// `mul` call that aliases the `%result_storage` representing the computed
	// value and not just `%result_storage`. This is required as the `%result` has
	// an associated fence indicating when it is available for use and using
	// `%result_storage` would just wait for the storage to be allocated and not
	// for the contents to have been populated by `mul`.
	return %result : tensor<4096xf32>
	}