Adding test for iree-run-module with multiple devices.
diff --git a/tools/test/BUILD.bazel b/tools/test/BUILD.bazel
index cf5878c..46709aa 100644
--- a/tools/test/BUILD.bazel
+++ b/tools/test/BUILD.bazel
@@ -31,6 +31,7 @@
"iree-run-mlir.mlir",
"iree-run-module-expected.mlir",
"iree-run-module-inputs.mlir",
+ "iree-run-module-multi.mlir",
"iree-run-module-outputs.mlir",
"iree-run-module.mlir",
"multiple_args.mlir",
diff --git a/tools/test/CMakeLists.txt b/tools/test/CMakeLists.txt
index 75dde66..a866548 100644
--- a/tools/test/CMakeLists.txt
+++ b/tools/test/CMakeLists.txt
@@ -27,6 +27,7 @@
"iree-run-mlir.mlir"
"iree-run-module-expected.mlir"
"iree-run-module-inputs.mlir"
+ "iree-run-module-multi.mlir"
"iree-run-module-outputs.mlir"
"iree-run-module.mlir"
"multiple_args.mlir"
diff --git a/tools/test/iree-run-module-multi.mlir b/tools/test/iree-run-module-multi.mlir
new file mode 100644
index 0000000..3412596
--- /dev/null
+++ b/tools/test/iree-run-module-multi.mlir
@@ -0,0 +1,43 @@
+// Tests that multiple devices are supported through iree-run-module by
+// providing two local thread pools. This is not optimal and not an intended
+// route for multi-device CPU workloads but requires no additional hardware
+// resources for the test and still verifies the compiler/runtime tooling
+// rendezvous of devices as specified on the command line.
+
+// RUN: (iree-compile %s \
+// RUN: --iree-execution-model=async-external \
+// RUN: --iree-hal-target-device=device_a=local[0] \
+// RUN: --iree-hal-target-device=device_b=local[1] \
+// RUN: --iree-hal-local-target-device-backends=vmvx | \
+// RUN: iree-run-module \
+// RUN: --module=- \
+// RUN: --function=mutli_device_mul \
+// RUN: --input=4xf32=10,11,12,13 \
+// RUN: --device=local-task \
+// RUN: --device=local-task \
+// RUN: --task_topology_group_count=1) | \
+// RUN: FileCheck %s
+
+// CHECK: EXEC @mutli_device_mul
+// CHECK-NEXT: result[0]: hal.buffer_view
+// CHECK-NEXT: 4xf32=0 55 144 273
+func.func public @mutli_device_mul(
+ // Input argument is resident on device_a (tooling default to first device).
+ %input_a: tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
+) -> (
+ // Output result is expected to be on device_a (though not required).
+ tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
+) {
+ // Compute on device_a (input is there).
+ %constant_a = arith.constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32>
+ %transient_a = arith.mulf %input_a, %constant_a : tensor<4xf32>
+ // Transfer the result from device_a -> device_b.
+ %transient_b = flow.tensor.transfer %transient_a : tensor<4xf32> to #hal.device.promise<@device_b>
+ // Compute on device_b.
+ %constant_b = arith.constant dense<[4.0, 5.0, 6.0, 7.0]> : tensor<4xf32>
+ %result_b = arith.mulf %transient_b, %constant_b : tensor<4xf32>
+ // Transfer the result from device_b -> device_a.
+ %result_a = flow.tensor.transfer %result_b : tensor<4xf32> to #hal.device.promise<@device_a>
+ // Return the result on device_a (as required by ABI attr).
+ func.return %result_a : tensor<4xf32>
+}