Ben Vanik | f721fd0 | 2024-07-26 09:02:22 -0700 | [diff] [blame] | 1 | // Tests that multiple devices are supported through iree-run-module by |
| 2 | // providing two local thread pools. This is not optimal and not an intended |
| 3 | // route for multi-device CPU workloads but requires no additional hardware |
| 4 | // resources for the test and still verifies the compiler/runtime tooling |
| 5 | // rendezvous of devices as specified on the command line. |
| 6 | |
| 7 | // RUN: (iree-compile %s \ |
| 8 | // RUN: --iree-execution-model=async-external \ |
| 9 | // RUN: --iree-hal-target-device=device_a=local[0] \ |
| 10 | // RUN: --iree-hal-target-device=device_b=local[1] \ |
| 11 | // RUN: --iree-hal-local-target-device-backends=vmvx | \ |
| 12 | // RUN: iree-run-module \ |
| 13 | // RUN: --module=- \ |
| 14 | // RUN: --function=mutli_device_mul \ |
| 15 | // RUN: --input=4xf32=10,11,12,13 \ |
| 16 | // RUN: --device=local-task \ |
| 17 | // RUN: --device=local-task \ |
| 18 | // RUN: --task_topology_group_count=1) | \ |
| 19 | // RUN: FileCheck %s |
| 20 | |
| 21 | // CHECK: EXEC @mutli_device_mul |
| 22 | // CHECK-NEXT: result[0]: hal.buffer_view |
| 23 | // CHECK-NEXT: 4xf32=0 55 144 273 |
| 24 | func.func public @mutli_device_mul( |
| 25 | // Input argument is resident on device_a (tooling default to first device). |
| 26 | %input_a: tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>} |
| 27 | ) -> ( |
| 28 | // Output result is expected to be on device_a (though not required). |
| 29 | tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>} |
| 30 | ) { |
| 31 | // Compute on device_a (input is there). |
| 32 | %constant_a = arith.constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32> |
| 33 | %transient_a = arith.mulf %input_a, %constant_a : tensor<4xf32> |
| 34 | // Transfer the result from device_a -> device_b. |
| 35 | %transient_b = flow.tensor.transfer %transient_a : tensor<4xf32> to #hal.device.promise<@device_b> |
| 36 | // Compute on device_b. |
| 37 | %constant_b = arith.constant dense<[4.0, 5.0, 6.0, 7.0]> : tensor<4xf32> |
| 38 | %result_b = arith.mulf %transient_b, %constant_b : tensor<4xf32> |
| 39 | // Transfer the result from device_b -> device_a. |
| 40 | %result_a = flow.tensor.transfer %result_b : tensor<4xf32> to #hal.device.promise<@device_a> |
| 41 | // Return the result on device_a (as required by ABI attr). |
| 42 | func.return %result_a : tensor<4xf32> |
| 43 | } |