Blame - tools/test/iree-run-module-multi.mlir - 3p/openxla/iree

blob: 3412596528180202b216192db692b7016c77c8f0 [file] [log] [blame]

Ben Vanik	f721fd0	2024-07-26 09:02:22 -0700	[diff] [blame]	1	// Tests that multiple devices are supported through iree-run-module by
				2	// providing two local thread pools. This is not optimal and not an intended
				3	// route for multi-device CPU workloads but requires no additional hardware
				4	// resources for the test and still verifies the compiler/runtime tooling
				5	// rendezvous of devices as specified on the command line.
				6
				7	// RUN: (iree-compile %s \
				8	// RUN: --iree-execution-model=async-external \
				9	// RUN: --iree-hal-target-device=device_a=local[0] \
				10	// RUN: --iree-hal-target-device=device_b=local[1] \
				11	// RUN: --iree-hal-local-target-device-backends=vmvx \| \
				12	// RUN: iree-run-module \
				13	// RUN: --module=- \
				14	// RUN: --function=mutli_device_mul \
				15	// RUN: --input=4xf32=10,11,12,13 \
				16	// RUN: --device=local-task \
				17	// RUN: --device=local-task \
				18	// RUN: --task_topology_group_count=1) \| \
				19	// RUN: FileCheck %s
				20
				21	// CHECK: EXEC @mutli_device_mul
				22	// CHECK-NEXT: result[0]: hal.buffer_view
				23	// CHECK-NEXT: 4xf32=0 55 144 273
				24	func.func public @mutli_device_mul(
				25	// Input argument is resident on device_a (tooling default to first device).
				26	%input_a: tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
				27	) -> (
				28	// Output result is expected to be on device_a (though not required).
				29	tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
				30	) {
				31	// Compute on device_a (input is there).
				32	%constant_a = arith.constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32>
				33	%transient_a = arith.mulf %input_a, %constant_a : tensor<4xf32>
				34	// Transfer the result from device_a -> device_b.
				35	%transient_b = flow.tensor.transfer %transient_a : tensor<4xf32> to #hal.device.promise<@device_b>
				36	// Compute on device_b.
				37	%constant_b = arith.constant dense<[4.0, 5.0, 6.0, 7.0]> : tensor<4xf32>
				38	%result_b = arith.mulf %transient_b, %constant_b : tensor<4xf32>
				39	// Transfer the result from device_b -> device_a.
				40	%result_a = flow.tensor.transfer %result_b : tensor<4xf32> to #hal.device.promise<@device_a>
				41	// Return the result on device_a (as required by ABI attr).
				42	func.return %result_a : tensor<4xf32>
				43	}