blob: 40de4d4896f2c7a7a18b5aa02440359231244a13 [file] [log] [blame] [edit]
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-stream-specialize-encodings)' --verify-diagnostics %s | FileCheck %s
//------------------------------------------------------------------------------
// IREE::CPU encoding layout specialization tests.
// These get serialized to the layout attributes.
//------------------------------------------------------------------------------
#map0 = affine_map<(m, n, k) -> (m, k)>
#map1 = affine_map<(m, n, k) -> (k, n)>
#map2 = affine_map<(m, n, k) -> (m, n)>
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_cpu.vmvx_encoding_layout<>}>
#executable_target_x86_64 = #hal.executable.target<"llvm-cpu", "xyz", {iree.encoding.resolver = #iree_cpu.cpu_encoding_layout<>, target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_x86_64]> : !hal.device
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map0, #map1, #map2]>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
util.func public @tensor_sizeof(%d0: index, %d1: index) -> (index, index) {
%size0 = stream.tensor.sizeof on(#hal.device.affinity<@device_a>) tensor<?x?xf32, #encoding>{%d0, %d1} : index
%size1 = stream.tensor.sizeof on(#hal.device.affinity<@device_b>) tensor<?x?xf32, #encoding>{%d0, %d1} : index
util.return %size0, %size1 : index, index
}
// CHECK: #[[$ENCODING0:.+]] = #iree_encoding.encoding
// CHECK-SAME: #iree_cpu.vmvx_encoding_layout
// CHECK-SAME: encoding_info = {innerDimsPos = [{{.+}}], innerTileSizes = [{{.+}}], outerDimsPerm = [{{.+}}]}
// CHECK: #[[$ENCODING1:.+]] = #iree_encoding.encoding
// CHECK-SAME: #iree_cpu.cpu_encoding_layout
// CHECK-SAME: encoding_info = {innerDimsPos = [{{.+}}], innerTileSizes = [{{.+}}], outerDimsPerm = [{{.+}}]}
// CHECK-LABEL: util.func public @tensor_sizeof
// CHECK: %[[D0_RES:.+]] = stream.tensor.sizeof {{.+}} tensor<?x?xf32, #[[$ENCODING0]]>
// CHECK: %[[D1_RES:.+]] = stream.tensor.sizeof {{.+}} tensor<?x?xf32, #[[$ENCODING1]]>
// CHECK: return %[[D0_RES]], %[[D1_RES]]
// -----
//------------------------------------------------------------------------------
// #iree_gpu.gpu_encoding_layout specialization tests.
// These get serialized to the layout attributes.
//------------------------------------------------------------------------------
#map0 = affine_map<(m, n, k) -> (m, k)>
#map1 = affine_map<(m, n, k) -> (k, n)>
#map2 = affine_map<(m, n, k) -> (m, n)>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb",
{
abi = "hip",
iree.encoding.resolver = #iree_gpu.gpu_encoding_layout<>,
iree.gpu.target = #iree_gpu.target<arch = "gfx942",
features = "",
wgp = <compute = fp32,
storage = b32,
subgroup = none,
dot = none,
mma = [<MFMA_F32_16x16x4_F32>],
subgroup_size_choices = [64],
max_workgroup_sizes = [1024, 1024, 1024],
max_thread_count_per_workgroup = 1024,
max_workgroup_memory_bytes = 65536,
max_workgroup_counts = [2147483647, 2147483647, 2147483647],
max_load_instruction_bits = 128,
simds_per_wgp = 4,
vgpr_space_bits = 16384>>
}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_rocm_hsaco_fb]> : !hal.device
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map0, #map1, #map2]>
util.global private @device_a = #device_target_local_0_
util.func public @gpu_with_encoding_layout(%d0: index, %d1: index) -> index {
%size0 = stream.tensor.sizeof on(#hal.device.affinity<@device_a>) tensor<?x?xf32, #encoding>{%d0, %d1} : index
util.return %size0 : index
}
// CHECK: #[[$ENCODING:.+]] = #iree_encoding.encoding
// CHECK-SAME: #iree_gpu.gpu_encoding_layout
// CHECK-SAME: encoding_info = {innerDimsPos = [{{.+}}], innerTileSizes = [{{.+}}], outerDimsPerm = [{{.+}}]}
// CHECK-LABEL: util.func public @gpu_with_encoding_layout
// CHECK: %[[RES:.+]] = stream.tensor.sizeof {{.+}} tensor<?x?xf32, #[[$ENCODING]]>
// CHECK: return %[[RES]]
// -----
//------------------------------------------------------------------------------
// iree_gpu.gpu_pad_encoding specialization tests.
// These get serialized to iree_encoding.pad_encoding_layout attributes.
//------------------------------------------------------------------------------
#map0 = affine_map<(m, n, k) -> (m, k)>
#map1 = affine_map<(m, n, k) -> (n, k)>
#map2 = affine_map<(m, n, k) -> (m, n)>
#map3 = affine_map<(m, n, k) -> (n, k)>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {abi = "hip",
iree.encoding.resolver = #iree_gpu.gpu_pad_layout<cache_line_bytes = 128, cache_sets = 4>, ukernels = "none"}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_rocm_hsaco_fb]> : !hal.device
#encodingA = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map0, #map1, #map2]>
#encodingB = #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map0, #map1, #map2]>
#encodingC = #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map0, #map1, #map2]>
#encodingD = #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map0, #map3, #map2]>
util.global private @device_a = #device_target_local_0_
util.func public @with_pad_encoding(%arg0: index, %arg1: index, %scalar_f32 : f32) {
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4096xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%1 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4160xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%2 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x1337xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%3 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4095xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%4 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x250xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%5 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<60x4096xf16, #encodingA>{} in !stream.resource<*>{%arg1}
%6 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<1x4096xf16, #encodingB>{} in !stream.resource<*>{%arg1}
%7 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x4096xf16, #encodingA>{%arg0} in !stream.resource<*>{%arg1}
%8 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x?xf16, #encodingA>{%arg0, %arg1} in !stream.resource<*>{%arg1}
%9 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4096xf16, #encodingB>{} in !stream.resource<*>{%arg1}
%10 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4096xf16, #encodingC>{} in !stream.resource<*>{%arg1}
%11 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<4096x4096xf16, #encodingD>{} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$NO_PAD_LHS:.+]] = #iree_encoding.encoding<operand_index = 0 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 0]>]
// CHECK-DAG: #[[$NO_PAD_RHS:.+]] = #iree_encoding.encoding<operand_index = 1 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 0]>]
// CHECK-DAG: #[[$NO_PAD_OUT:.+]] = #iree_encoding.encoding<operand_index = 2 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 0]>]
// CHECK-DAG: #[[$PAD_LHS_0:.+]] = #iree_encoding.encoding<operand_index = 0 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 64]>]
// CHECK-DAG: #[[$PAD_LHS_1:.+]] = #iree_encoding.encoding<operand_index = 0 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 7]>]
// CHECK-DAG: #[[$PAD_LHS_2:.+]] = #iree_encoding.encoding<operand_index = 0 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 65]>]
// CHECK-DAG: #[[$PAD_RHS:.+]] = #iree_encoding.encoding<operand_index = 1 : index, {{.*}}, layouts = [#iree_encoding.pad_encoding_layout<[0, 64]>]
// CHECK-LABEL: util.func public @with_pad_encoding
//
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4096xf16, #[[$PAD_LHS_0]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4160xf16, #[[$NO_PAD_LHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x1337xf16, #[[$PAD_LHS_1]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4095xf16, #[[$PAD_LHS_2]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x250xf16, #[[$NO_PAD_LHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<60x4096xf16, #[[$NO_PAD_LHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<1x4096xf16, #[[$NO_PAD_RHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<?x4096xf16, #[[$PAD_LHS_0]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<?x?xf16, #[[$NO_PAD_LHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4096xf16, #[[$PAD_RHS]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4096xf16, #[[$NO_PAD_OUT]]>
// CHECK: stream.tensor.empty {{.*}} : tensor<4096x4096xf16, #[[$PAD_RHS]]>
//
// CHECK-NEXT: util.return
// -----
//------------------------------------------------------------------------------
// Stream ops that have TensorPhaseOp trait. This test suite tests that the
// encoding is updated that carries resolved layouts.
//------------------------------------------------------------------------------
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.func public @ops_with_result_encoding_only(%arg0: index, %arg1: index, %scalar_f32 : f32) {
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x0xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
%1 = stream.tensor.constant on(#hal.device.affinity<@device_a>) : tensor<?x5x64xf32>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
%2 = stream.tensor.splat on(#hal.device.affinity<@device_a>) %scalar_f32 : f32 -> tensor<?x1x10xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$ENCODING0:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x0xf32>>]>
// CHECK-DAG: #[[$ENCODING1:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x1x10xf32>>]>
// CHECK: #[[TARGET:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE:.+]] = #[[TARGET]]
// CHECK-LABEL: util.func public @ops_with_result_encoding_only
// CHECK: stream.tensor.empty on(#hal.device.affinity<@[[$DEVICE]]>) : tensor<?x0xf32, #[[$ENCODING0]]>
// CHECK: stream.tensor.constant {{.+}} : tensor<1x5x64xf32>
// CHECK: stream.tensor.splat on(#hal.device.affinity<@[[$DEVICE]]>) {{.+}} -> tensor<?x1x10xf32, #[[$ENCODING1]]>
// CHECK: return
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.func public @tensor_fill_op(%arg0: f32, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = stream.tensor.fill on(#hal.device.affinity<@device_a>)
%arg0, %arg1[%c0, %c0 for %c1, %c1] : f32
-> tensor<?x4xf32, #encoding>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
util.return
}
// CHECK-DAG: #[[$ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x4xf32>>]>
// CHECK: #[[TARGET:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE:.+]] = #[[TARGET]]
// CHECK-LABEL: util.func public @tensor_fill_op
// CHECK: stream.tensor.fill on(#hal.device.affinity<@[[$DEVICE]]>)
// CHECK-SAME: f32 -> tensor<?x4xf32, #[[$ENCODING]]>
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.func public @tensor_encode_op(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = stream.tensor.encode on(#hal.device.affinity<@device_a>)
%arg0 : tensor<?x?xf32>{%arg2, %arg3} in !stream.resource<*>{%arg1}
-> tensor<?x?xf32, #encoding>{%arg2, %arg3} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]>
// CHECK: #[[TARGET:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE:.+]] = #[[TARGET]]
// CHECK-LABEL: util.func public @tensor_encode_op
// CHECK: stream.tensor.encode on(#hal.device.affinity<@[[$DEVICE]]>)
// CHECK-SAME: -> tensor<?x?xf32, #[[$ENCODING]]>
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding0 = #iree_encoding.testing_encoding<>
#encoding1 = #iree_encoding.unknown_encoding
util.global private @device_a = #device_target_local_0_
util.func public @tensor_encode_op_change_encoding(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = stream.tensor.encode on(#hal.device.affinity<@device_a>)
%arg0 : tensor<?x?xf32, #encoding0>{%arg2, %arg3} in !stream.resource<*>{%arg1}
-> tensor<?x?xf32, #encoding1>{%arg2, %arg3} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$ENCODING0:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]>
// CHECK-DAG: #[[$ENCODING1:.+]] = #iree_encoding.unknown_encoding
// CHECK: #[[TARGET:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE:.+]] = #[[TARGET]]
// CHECK-LABEL: util.func public @tensor_encode_op_change_encoding
// CHECK: stream.tensor.encode on(#hal.device.affinity<@[[$DEVICE]]>)
// CHECK-SAME: : tensor<?x?xf32, #[[$ENCODING0]]>
// CHECK-SAME: -> tensor<?x?xf32, #[[$ENCODING1]]>
// -----
// Checks that the stream.tensor.constant op with unserialized encoding is not
// supported.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
// expected-error @+1 {{failed to add layouts to Stream::TensorPhaseOp with encodings}}
module {
util.global private @device_a = #device_target_local_0_
util.func public @tensor_constant_op_with_unserialized_encoding(%arg0: index) {
// expected-error @+1 {{failed to convert unserialized encoding to serialized encoding}}
%0 = stream.tensor.constant on(#hal.device.affinity<@device_a>) : tensor<?x5x64xf32, #encoding>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
util.return
}
}
// -----
#encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a : !hal.device
util.func public @tensor_constant_op_with_serialized_encoding(%arg0: index) {
%0 = stream.tensor.constant on(#hal.device.affinity<@device_a>) : tensor<?x5x64xf32, #encoding>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
util.return
}
// CHECK: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK-LABEL: util.func public @tensor_constant_op_with_serialized_encoding(
// CHECK: stream.tensor.constant
// CHECK-SAME: tensor<?x5x64xf32, #[[$SERIALIZED_ENCODING]]>
// -----
#encoding = #iree_encoding.unknown_encoding
util.global private @device_a : !hal.device
util.func public @tensor_constant_op_with_unknown_encoding(%arg0: index) {
%0 = stream.tensor.constant on(#hal.device.affinity<@device_a>) : tensor<?x5x64xf32, #encoding>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
util.return
}
// CHECK: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK-LABEL: util.func public @tensor_constant_op_with_unknown_encoding(
// CHECK: stream.tensor.constant
// CHECK-SAME: tensor<?x5x64xf32, #[[$UNKNOWN_ENCODING]]>
// -----
// Checks that the stream.tensor.clone op with unserialized encoding is not
// supported.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
// expected-error @+1 {{failed to add layouts to Stream::TensorPhaseOp with encodings}}
module {
util.global private @device_a = #device_target_local_0_
util.func public @tensor_clone_op_with_unserialized_encoding(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
// expected-error @+1 {{failed to convert unserialized encoding to serialized encoding}}
%0 = stream.tensor.clone on(#hal.device.affinity<@device_a>)
%arg0 : tensor<?x4xf32, #encoding>{%arg1} in !stream.resource<*>{%arg2}
-> tensor<?x4xf32, #encoding>{%arg1} in !stream.resource<*>{%arg2}
util.return
}
}
// -----
#unknown_encoding = #iree_encoding.unknown_encoding
util.global private @device_a : !hal.device
util.func public @tensor_clone_op_with_unknown_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
%0 = stream.tensor.clone on(#hal.device.affinity<@device_a>)
%arg0 : tensor<?x4xf32, #unknown_encoding>{%arg1} in !stream.resource<*>{%arg2}
-> tensor<?x4xf32, #unknown_encoding>{%arg1} in !stream.resource<*>{%arg2}
util.return
}
// CHECK-DAG: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK-LABEL: util.func public @tensor_clone_op_with_unknown_encodings(
// CHECK: stream.tensor.clone
// CHECK-SAME: tensor<?x4xf32, #[[$UNKNOWN_ENCODING]]>
// CHECK-SAME: tensor<?x4xf32, #[[$UNKNOWN_ENCODING]]>
// -----
#serialized_encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a : !hal.device
util.func public @tensor_clone_op_with_serialized_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
%0 = stream.tensor.clone on(#hal.device.affinity<@device_a>)
%arg0 : tensor<?x4xf32, #serialized_encoding>{%arg1} in !stream.resource<*>{%arg2}
-> tensor<?x4xf32, #serialized_encoding>{%arg1} in !stream.resource<*>{%arg2}
util.return
}
// CHECK-DAG: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK-LABEL: util.func public @tensor_clone_op_with_serialized_encodings(
// CHECK: stream.tensor.clone
// CHECK-SAME: tensor<?x4xf32, #[[$SERIALIZED_ENCODING]]>
// CHECK-SAME: tensor<?x4xf32, #[[$SERIALIZED_ENCODING]]>
// -----
// Checks that the stream.tensor.slice op with unserialized encoding is not
// supported.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
// expected-error @+1 {{failed to add layouts to Stream::TensorPhaseOp with encodings}}
module {
util.global private @device_a = #device_target_local_0_
util.func public @tensor_slice_op_with_encoding(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// expected-error @+1 {{failed to convert unserialized encoding to serialized encoding}}
%1 = stream.tensor.slice on(#hal.device.affinity<@device_a>)
%arg0[%c0, %c1 for %arg3, %c1] : tensor<?x4xf32, #encoding>{%arg1} in !stream.resource<*>{%arg2}
-> tensor<?x1xf32, #encoding>{%arg3} in !stream.resource<*>{%arg4}
util.return
}
}
// -----
#unknown_encoding = #iree_encoding.unknown_encoding
#serialized_encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a : !hal.device
util.func public @tensor_slice_op_with_unknown_or_serialized_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%1 = stream.tensor.slice on(#hal.device.affinity<@device_a>)
%arg0[%c0, %c1 for %arg3, %c1] : tensor<?x4xf32, #unknown_encoding>{%arg1} in !stream.resource<*>{%arg2}
-> tensor<?x1xf32, #serialized_encoding>{%arg3} in !stream.resource<*>{%arg4}
util.return
}
// CHECK: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK-LABEL: util.func public @tensor_slice_op_with_unknown_or_serialized_encodings(
// CHECK: stream.tensor.slice
// CHECK-SAME: tensor<?x4xf32, #[[$UNKNOWN_ENCODING]]>
// CHECK-SAME: tensor<?x1xf32, #[[$SERIALIZED_ENCODING]]>
// -----
// Checks that the stream.tensor.update op with unserialized encoding is not
// supported.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
// expected-error @+1 {{failed to add layouts to Stream::TensorPhaseOp with encodings}}
module {
util.global private @device_a = #device_target_local_0_
util.func public @tensor_update_op_with_unserialized_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// expected-error @+1 {{failed to convert unserialized encoding to serialized encoding}}
%0 = stream.tensor.update on(#hal.device.affinity<@device_a>)
%arg0, %arg2[%c0, %c0] : tensor<2x2xf32, #encoding> in !stream.resource<*>{%arg1}
-> tensor<?x4xf32, #encoding>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
util.return
}
}
// -----
#unknown_encoding = #iree_encoding.unknown_encoding
#serialized_encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a : !hal.device
util.func public @tensor_update_op_with_unknown_or_serialized_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = stream.tensor.update on(#hal.device.affinity<@device_a>)
%arg0, %arg2[%c0, %c0] : tensor<2x2xf32, #unknown_encoding> in !stream.resource<*>{%arg1}
-> tensor<?x4xf32, #serialized_encoding>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
util.return
}
// CHECK: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK-LABEL: util.func public @tensor_update_op_with_unknown_or_serialized_encodings(
// CHECK: stream.tensor.update
// CHECK-SAME: tensor<2x2xf32, #[[$UNKNOWN_ENCODING]]>
// CHECK-SAME: tensor<?x4xf32, #[[$SERIALIZED_ENCODING]]>
// -----
// Creates an identity encoding if encoding attribute is not available in the
// target configuration.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.func public @drop_encoding(%arg0: index, %arg1: index, %scalar_f32 : f32) {
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x0xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$IDENTITY_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.pad_encoding_layout<[0, 0]>]>
// CHECK-LABEL: util.func public @drop_encoding
// CHECK: stream.tensor.empty {{.+}} : tensor<?x0xf32, #[[$IDENTITY_ENCODING]]>
// -----
// Creates an identity encoding if iree_encoding.identity_encoding is used.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", { iree.encoding.resolver = #iree_encoding.identity_encoding }>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.func public @ignore_encoding_by_identity_encoding(%arg0: index, %arg1: index, %scalar_f32 : f32) {
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x0xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[$IDENTITY_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.pad_encoding_layout<[0, 0]>]>
// CHECK-LABEL: util.func public @ignore_encoding_by_identity_encoding
// CHECK: stream.tensor.empty {{.+}} : tensor<?x0xf32, #[[$IDENTITY_ENCODING]]>
// -----
// Do not update encodings if they are already serialized.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a = #device_target_local_0_
util.func public @keep_encoding_if_serialized(%arg0: index, %arg1: index, %scalar_f32 : f32) {
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x0xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
util.return
}
// CHECK: #[[$ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK-LABEL: util.func public @keep_encoding_if_serialized
// CHECK: stream.tensor.empty {{.+}} : tensor<?x0xf32, #[[$ENCODING]]>
// -----
// Check that a failure is signaled if we are not able to resolve a recognized
// encoding.
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", { iree.encoding.resolver = #iree_encoding.unsupported_encoding }>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
// expected-error @+1 {{failed to add layouts to Stream::TensorPhaseOp with encodings}}
module {
util.global private @device_a = #device_target_local_0_
util.func public @fail_to_get_recognized_layout(%arg0: index, %arg1: index, %scalar_f32 : f32) {
// expected-error @+2 {{failed to resolve recognized layout}}
// expected-error @+1 {{failed to convert unserialized encoding to serialized encoding}}
%0 = stream.tensor.empty on(#hal.device.affinity<@device_a>) : tensor<?x0xf32, #encoding>{%arg0} in !stream.resource<*>{%arg1}
util.return
}
}
// -----
//------------------------------------------------------------------------------
// This test suite tests the executable duplication logic.
//------------------------------------------------------------------------------
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
stream.executable private @executable {
stream.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !stream.binding, %arg1: index) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:tensor<16xf32, #encoding>>
return
}
}
}
util.func public @tensor_dispatch_with_tied_operands(%arg0: !stream.resource<external>, %arg1: index, %arg2: index, %arg3: index) -> !stream.resource<*> {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg2}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @executable::@dispatch(%0, %arg3) : (tensor<4x?xf32, #encoding>{%arg2} in !stream.resource<*>{%arg1}, index) -> tensor<4x?xf32, #encoding>{%arg2} in %0{%arg1}
util.return %1 : !stream.resource<*>
}
// CHECK-DAG: #[[$ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<4x?xf32>>]>
// CHECK: #[[TARGET:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE:.+]] = #[[TARGET]]
// CHECK-LABEL: util.func public @tensor_dispatch_with_tied_operands
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE]]>)
// CHECK-SAME: tensor<4x?xf32, #[[$ENCODING]]>{%[[ARG2]]}
// CHECK-SAME: tensor<4x?xf32, #[[$ENCODING]]>{%[[ARG2]]}
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_vmvx_bytecode_fb]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
stream.executable private @ex {
stream.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding) {
%c0 = arith.constant 0 : index
%1 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<16xf32, #encoding>>
%2 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<16xf32, #encoding>>
return
}
}
}
util.func public @multi_device_with_same_executable_targets(%arg0: !stream.resource<external>, %arg1: index) {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg1} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg1}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @ex::@dispatch(%0) : (tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}) -> tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}
%2 = stream.async.transfer %1 : !stream.resource<*>{%arg1} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%arg1}
%3 = stream.tensor.dispatch on(#hal.device.affinity<@device_b>) @ex::@dispatch(%2) : (tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}) -> tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[DEVICE_LOCAL_0:.+]] = #hal.device.target
// CHECK-DAG: #[[DEVICE_LOCAL_1:.+]] = #hal.device.target
// CHECK-DAG: #[[$ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<16xf32>>]>
// CHECK: util.global private @[[$DEVICE_A:.+]] = #[[DEVICE_LOCAL_0]]
// CHECK: util.global private @[[$DEVICE_B:.+]] = #[[DEVICE_LOCAL_1]]
// CHECK: stream.executable private @[[$EX0:.+]] {
// CHECK: stream.binding.subspan{{.+}}#[[$ENCODING]]
// CHECK: stream.binding.subspan{{.+}}#[[$ENCODING]]
// CHECK-NOT: stream.executable private
// CHECK-LABEL: util.func public @multi_device_with_same_executable_targets
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_A]]>) @[[$EX0]]::@dispatch
// CHECK-SAME: #[[$ENCODING]]
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_B]]>) @[[$EX0]]::@dispatch
// CHECK-SAME: #[[$ENCODING]]
// -----
// Tests that launch the executable on device_a, pass the result to device_b and
// launch it on device_b. Thus, the incoming layout of second tensor dispatch op
// has device_a layout, and it produces device_b layout.
#executable_target_a = #hal.executable.target<"target_a", "abc", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#executable_target_b = #hal.executable.target<"target_b", "xyz", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<456>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_a]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_b]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
stream.executable private @ex {
stream.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding) {
%c0 = arith.constant 0 : index
%1 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<16xf32, #encoding>>
%2 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<16xf32, #encoding>>
return
}
}
}
util.func public @multi_device_with_different_executable_targets(%arg0: !stream.resource<external>, %arg1: index) {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg1} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg1}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @ex::@dispatch(%0) : (tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}) -> tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}
%2 = stream.async.transfer %1 : !stream.resource<*>{%arg1} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%arg1}
%3 = stream.tensor.dispatch on(#hal.device.affinity<@device_b>) @ex::@dispatch(%2) : (tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}) -> tensor<16xf32, #encoding> in !stream.resource<*>{%arg1}
util.return
}
// CHECK-DAG: #[[DEVICE_LOCAL_0:.+]] = #hal.device.target
// CHECK-DAG: #[[DEVICE_LOCAL_1:.+]] = #hal.device.target
// CHECK-DAG: #[[$DEVICE_A_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<16xf32>>]>
// CHECK-DAG: #[[$DEVICE_B_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<456, tensor<16xf32>>]>
// CHECK: util.global private @[[$DEVICE_A:.+]] = #[[DEVICE_LOCAL_0]]
// CHECK: util.global private @[[$DEVICE_B:.+]] = #[[DEVICE_LOCAL_1]]
// CHECK: stream.executable private @[[$EX0:.+]] {
// CHECK: stream.binding.subspan{{.+}}#[[$DEVICE_A_ENCODING]]
// CHECK: stream.binding.subspan{{.+}}#[[$DEVICE_A_ENCODING]]
// CHECK: stream.executable private @[[$EX1:.+]] {
// CHECK: stream.binding.subspan{{.+}}#[[$DEVICE_A_ENCODING]]
// CHECK: stream.binding.subspan{{.+}}#[[$DEVICE_B_ENCODING]]
// CHECK-LABEL: util.func public @multi_device_with_different_executable_targets
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_A]]>) @[[$EX0]]::@dispatch
// CHECK-SAME: #[[$DEVICE_A_ENCODING]]
// CHECK-SAME: #[[$DEVICE_A_ENCODING]]
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_B]]>) @[[$EX1]]::@dispatch
// CHECK-SAME: #[[$DEVICE_A_ENCODING]]
// CHECK-SAME: #[[$DEVICE_B_ENCODING]]
// -----
// This tests the set_encoding, where the destination tensor type is encoded.
// The program has two external stream.resource. It imports transfer one to
// the device_a and the other to the device_b. Then it launches the set_encoding
// executable on both devices. We check that the executable is duplicated and
// the encodings on bindings are updated.
#executable_target_a = #hal.executable.target<"target_a", "abc", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#executable_target_b = #hal.executable.target<"target_b", "xyz", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<456>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_a]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_b]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
stream.executable private @ex {
stream.executable.export public @set_encoding
builtin.module {
func.func @set_encoding(%arg0: !stream.binding, %arg1: index, %arg2: index, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = flow.dispatch.workload.ordinal %arg1, 0 : index
%1 = flow.dispatch.workload.ordinal %arg2, 1 : index
%2 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #encoding>>{%0, %1}
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%5 = iree_encoding.set_encoding %4 : tensor<?x?xf32> -> tensor<?x?xf32, #encoding>
flow.dispatch.tensor.store %5, %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor<?x?xf32, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #encoding>>{%0, %1}
return
}
}
}
util.func public @multi_device_set_encoding(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>, %arg2: index, %N : index, %K : index) {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg2}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @ex::@set_encoding(%0, %N, %K) : (tensor<?x?xf32>{%N, %K} in !stream.resource<*>{%arg2}, index, index) -> (tensor<?x?xf32, #encoding>{%N, %K} in !stream.resource<*>{%arg2})
%2 = stream.async.transfer %arg1 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_b>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%arg2}
%3 = stream.tensor.dispatch on(#hal.device.affinity<@device_b>) @ex::@set_encoding(%2, %N, %K) : (tensor<?x?xf32>{%N, %K} in !stream.resource<*>{%arg2}, index, index) -> (tensor<?x?xf32, #encoding>{%N, %K} in !stream.resource<*>{%arg2})
util.return
}
// CHECK-DAG: #[[DEVICE_A_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]>
// CHECK-DAG: #[[DEVICE_B_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<456, tensor<?x?xf32>>]>
// CHECK-DAG: #[[ORIG_ENCODING:.+]] = #iree_encoding.testing_encoding<>
// CHECK-DAG: #[[DEVICE_LOCAL_0:.+]] = #hal.device.target
// CHECK-DAG: #[[DEVICE_LOCAL_1:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE_A:.+]] = #[[DEVICE_LOCAL_0]]
// CHECK: util.global private @[[$DEVICE_B:.+]] = #[[DEVICE_LOCAL_1]]
// CHECK: stream.executable private @[[$EX0:.+]] {
// CHECK: func.func @set_encoding(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK: %[[SRC_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>
// CHECK: %[[DEST_BINDING:.+]] = stream.binding.subspan %[[ARG3]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #[[DEVICE_A_ENCODING]]>
// CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]]
// CHECK: %[[SET_ENCODING:.+]] = iree_encoding.set_encoding %[[SRC]]
// CHECK-SAME: tensor<?x?xf32> -> tensor<?x?xf32, #[[ORIG_ENCODING]]>
// CHECK: flow.dispatch.tensor.store %[[SET_ENCODING]], %[[DEST_BINDING]]
// CHECK: stream.executable private @[[$EX1:.+]] {
// CHECK: func.func @set_encoding(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK: %[[SRC_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>
// CHECK: %[[DEST_BINDING:.+]] = stream.binding.subspan %[[ARG3]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #[[DEVICE_B_ENCODING]]>
// CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]]
// CHECK: %[[SET_ENCODING:.+]] = iree_encoding.set_encoding %[[SRC]]
// CHECK-SAME: tensor<?x?xf32> -> tensor<?x?xf32, #[[ORIG_ENCODING]]>
// CHECK: flow.dispatch.tensor.store %[[SET_ENCODING]], %[[DEST_BINDING]]
// CHECK-LABEL: util.func public @multi_device_set_encoding
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_A]]>) @[[$EX0]]::@set_encoding
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_B]]>) @[[$EX1]]::@set_encoding
// -----
// This test is simliar to the set_encoding test, but with unset_encoding ops.
#executable_target_a = #hal.executable.target<"target_a", "abc", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#executable_target_b = #hal.executable.target<"target_b", "xyz", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<456>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_a]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_b]> : !hal.device
#encoding = #iree_encoding.testing_encoding<>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
stream.executable private @ex {
stream.executable.export public @unset_encoding
builtin.module {
func.func @unset_encoding(%arg0: !stream.binding, %arg1: index, %arg2: index, %arg3: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = flow.dispatch.workload.ordinal %arg1, 0 : index
%1 = flow.dispatch.workload.ordinal %arg2, 1 : index
%2 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding>>{%0, %1}
%3 = stream.binding.subspan %arg3[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding>>{%0, %1} -> tensor<?x?xf32, #encoding>
%5 = iree_encoding.unset_encoding %4 : tensor<?x?xf32, #encoding> -> tensor<?x?xf32>{%0, %1}
flow.dispatch.tensor.store %5, %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
return
}
}
}
util.func public @multi_device_unset_encoding(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>, %arg2: index, %M: index, %N: index) {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg2}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @ex::@unset_encoding(%0, %M, %N) : (tensor<?x?xf32, #encoding>{%M, %N} in !stream.resource<*>{%arg2}, index, index) -> (tensor<?x?xf32>{%M, %N} in !stream.resource<*>{%arg2})
%2 = stream.async.transfer %arg1 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_b>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%arg2}
%3 = stream.tensor.dispatch on(#hal.device.affinity<@device_b>) @ex::@unset_encoding(%2, %M, %N) : (tensor<?x?xf32, #encoding>{%M, %N} in !stream.resource<*>{%arg2}, index, index) -> (tensor<?x?xf32>{%M, %N} in !stream.resource<*>{%arg2})
util.return
}
// CHECK-DAG: #[[DEVICE_A_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]>
// CHECK-DAG: #[[DEVICE_B_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<456, tensor<?x?xf32>>]>
// CHECK-DAG: #[[ORIG_ENCODING:.+]] = #iree_encoding.testing_encoding<>
// CHECK-DAG: #[[DEVICE_LOCAL_0:.+]] = #hal.device.target
// CHECK-DAG: #[[DEVICE_LOCAL_1:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE_A:.+]] = #[[DEVICE_LOCAL_0]]
// CHECK: util.global private @[[$DEVICE_B:.+]] = #[[DEVICE_LOCAL_1]]
// CHECK: stream.executable private @[[$EX0:.+]] {
// CHECK: func.func @unset_encoding(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK: %[[SRC_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_ENCODING]]>>
// CHECK: %[[DEST_BINDING:.+]] = stream.binding.subspan %[[ARG3]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>
// CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_ENCODING]]>
// CHECK: %[[UNSET_ENCODING:.+]] = iree_encoding.unset_encoding %[[SRC]]
// CHECK-SAME: tensor<?x?xf32, #[[ORIG_ENCODING]]> -> tensor<?x?xf32>
// CHECK: flow.dispatch.tensor.store %[[UNSET_ENCODING]], %[[DEST_BINDING]]
// CHECK: stream.executable private @[[$EX1:.+]] {
// CHECK: func.func @unset_encoding(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK: %[[SRC_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_ENCODING]]>>
// CHECK: %[[DEST_BINDING:.+]] = stream.binding.subspan %[[ARG3]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>
// CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_ENCODING]]>
// CHECK: %[[UNSET_ENCODING:.+]] = iree_encoding.unset_encoding %[[SRC]]
// CHECK-SAME: tensor<?x?xf32, #[[ORIG_ENCODING]]> -> tensor<?x?xf32>
// CHECK: flow.dispatch.tensor.store %[[UNSET_ENCODING]], %[[DEST_BINDING]]
// CHECK-LABEL: util.func public @multi_device_unset_encoding
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_A]]>) @[[$EX0]]::@unset_encoding
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_B]]>) @[[$EX1]]::@unset_encoding
// -----
// This tests the computation ops on tensor encodings, where all the tensor
// types are encoded. The computation body is fill + matmul.
#executable_target_a = #hal.executable.target<"target_a", "abc", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#executable_target_b = #hal.executable.target<"target_b", "xyz", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<456>}>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_a]> : !hal.device
#device_target_local_1_ = #hal.device.target<"local", {ordinal = 1 : index}, [#executable_target_b]> : !hal.device
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2]>
#encoding1 = #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2]>
#encoding2 = #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2]>
util.global private @device_a = #device_target_local_0_
util.global private @device_b = #device_target_local_1_
stream.executable private @ex {
stream.executable.export public @gemm
builtin.module {
func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: !stream.binding) {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = flow.dispatch.workload.ordinal %arg2, 0 : index
%1 = flow.dispatch.workload.ordinal %arg3, 1 : index
%2 = flow.dispatch.workload.ordinal %arg4, 2 : index
%3 = flow.dispatch.workload.ordinal %arg5, 3 : index
%4 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding>>{%2, %0}
%5 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding1>>{%1, %3}
%6 = stream.binding.subspan %arg6[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #encoding2>>{%2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %0], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding>>{%2, %0} -> tensor<?x?xf32, #encoding>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%1, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding1>>{%1, %3} -> tensor<?x?xf32, #encoding1>
%9 = tensor.empty(%2, %3) : tensor<?x?xf32, #encoding2>
%10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<?x?xf32, #encoding2>) -> tensor<?x?xf32, #encoding2>
%11 = linalg.matmul ins(%7, %8 : tensor<?x?xf32, #encoding>, tensor<?x?xf32, #encoding1>) outs(%10 : tensor<?x?xf32, #encoding2>) -> tensor<?x?xf32, #encoding2>
flow.dispatch.tensor.store %11, %6, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : tensor<?x?xf32, #encoding2> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #encoding2>>{%2, %3}
return
}
}
}
util.func public @multi_device_gemm(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>, %arg2: !stream.resource<external>, %arg3: !stream.resource<external>, %M: index, %N: index, %K: index) {
%MK = arith.muli %M, %K : index
%NK = arith.muli %N, %K : index
%MN = arith.muli %M, %N : index
%LHS_A = stream.async.transfer %arg0 : !stream.resource<external>{%MK} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%MK}
%RHS_A = stream.async.transfer %arg1 : !stream.resource<external>{%NK} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%NK}
%RES_A = stream.tensor.dispatch on(#hal.device.affinity<@device_a>)
@ex::@gemm(%LHS_A, %RHS_A, %K, %K, %M, %N)
: (tensor<?x?xf32, #encoding>{%M, %K} in !stream.resource<*>{%MK}, tensor<?x?xf32, #encoding1>{%N, %K} in !stream.resource<*>{%NK}, index, index, index, index)
-> (tensor<?x?xf32, #encoding2>{%M, %N} in !stream.resource<*>{%MN})
%barrier_0 = util.optimization_barrier %RES_A : !stream.resource<*>
%LHS_B = stream.async.transfer %arg2 : !stream.resource<external>{%MK} from(#hal.device.affinity<@device_b>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%MK}
%RHS_B = stream.async.transfer %arg3 : !stream.resource<external>{%NK} from(#hal.device.affinity<@device_b>) -> to(#hal.device.affinity<@device_b>) !stream.resource<*>{%NK}
%RES_B = stream.tensor.dispatch on(#hal.device.affinity<@device_b>)
@ex::@gemm(%LHS_B, %RHS_B, %K, %K, %M, %N)
: (tensor<?x?xf32, #encoding>{%M, %K} in !stream.resource<*>{%MK}, tensor<?x?xf32, #encoding1>{%N, %K} in !stream.resource<*>{%NK}, index, index, index, index)
-> (tensor<?x?xf32, #encoding2>{%M, %N} in !stream.resource<*>{%MN})
%barrier_1 = util.optimization_barrier %RES_B : !stream.resource<*>
util.return
}
// CHECK-DAG: #[[DEVICE_A_LHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 0{{.+}} layouts = [#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]
// CHECK-DAG: #[[DEVICE_A_RHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 1{{.+}} layouts = [#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]
// CHECK-DAG: #[[DEVICE_A_OUT_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 2{{.+}} layouts = [#iree_encoding.specialized_encoding<123, tensor<?x?xf32>>]
// CHECK-DAG: #[[DEVICE_B_LHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 0{{.+}} layouts = [#iree_encoding.specialized_encoding<456, tensor<?x?xf32>>]
// CHECK-DAG: #[[DEVICE_B_RHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 1{{.+}} layouts = [#iree_encoding.specialized_encoding<456, tensor<?x?xf32>>]
// CHECK-DAG: #[[DEVICE_B_OUT_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 2{{.+}} layouts = [#iree_encoding.specialized_encoding<456, tensor<?x?xf32>>]
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
// CHECK-DAG: #[[ORIG_LHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 0{{.+}} user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>
// CHECK-DAG: #[[ORIG_RHS_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 1{{.+}} user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>
// CHECK-DAG: #[[ORIG_OUT_ENCODING:.+]] = #iree_encoding.encoding<operand_index = 2{{.+}} user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>
// CHECK-DAG: #[[DEVICE_LOCAL_0:.+]] = #hal.device.target
// CHECK-DAG: #[[DEVICE_LOCAL_1:.+]] = #hal.device.target
// CHECK: util.global private @[[$DEVICE_A:.+]] = #[[DEVICE_LOCAL_0]]
// CHECK: util.global private @[[$DEVICE_B:.+]] = #[[DEVICE_LOCAL_1]]
// CHECK: stream.executable private @[[$EX0:.+]] {
// CHECK: func.func @gemm(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]
// CHECK: %[[LHS_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_LHS_ENCODING]]>>
// CHECK: %[[RHS_BINDING:.+]] = stream.binding.subspan %[[ARG1]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_RHS_ENCODING]]>>
// CHECK: %[[OUT_BINDING:.+]] = stream.binding.subspan %[[ARG6]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #[[DEVICE_A_OUT_ENCODING]]>>
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_LHS_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_LHS_ENCODING]]>
// CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_A_RHS_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_RHS_ENCODING]]>
// CHECK: %[[INIT:.+]] = tensor.empty({{.+}}) : tensor<?x?xf32, #[[ORIG_OUT_ENCODING]]>
// CHECK: %[[FILL:.+]] = linalg.fill ins({{.+}}) outs(%[[INIT]]
// CHECK: %[[MATMUL:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[LHS]], %[[RHS]]
// CHECK-SAME: outs(%[[FILL]]
// CHECK: flow.dispatch.tensor.store %[[MATMUL]], %[[OUT_BINDING]]
// CHECK: stream.executable private @[[$EX1:.+]] {
// CHECK: func.func @gemm(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]
// CHECK: %[[LHS_BINDING:.+]] = stream.binding.subspan %[[ARG0]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_LHS_ENCODING]]>>
// CHECK: %[[RHS_BINDING:.+]] = stream.binding.subspan %[[ARG1]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_RHS_ENCODING]]>>
// CHECK: %[[OUT_BINDING:.+]] = stream.binding.subspan %[[ARG6]]
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #[[DEVICE_B_OUT_ENCODING]]>>
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_LHS_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_LHS_ENCODING]]>
// CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #[[DEVICE_B_RHS_ENCODING]]>>
// CHECK-SAME: -> tensor<?x?xf32, #[[ORIG_RHS_ENCODING]]>
// CHECK: %[[INIT:.+]] = tensor.empty({{.+}}) : tensor<?x?xf32, #[[ORIG_OUT_ENCODING]]>
// CHECK: %[[FILL:.+]] = linalg.fill ins({{.+}}) outs(%[[INIT]]
// CHECK: %[[MATMUL:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[LHS]], %[[RHS]]
// CHECK-SAME: outs(%[[FILL]]
// CHECK: flow.dispatch.tensor.store %[[MATMUL]], %[[OUT_BINDING]]
// CHECK-LABEL: util.func public @multi_device_gemm
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_A]]>) @[[$EX0]]::@gemm
// CHECK: stream.tensor.dispatch on(#hal.device.affinity<@[[$DEVICE_B]]>) @[[$EX1]]::@gemm
// -----
// A test for unknown encodings and already serialized encodings. It does
// nothing if the encoding is not recognized. It updates the subspan binding, if
// the encoding is already serialized.
#unknown_encoding = #iree_encoding.unknown_encoding
#serialized_encoding = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
util.global private @device_a : !hal.device
stream.executable private @executable {
stream.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !stream.binding, %arg1: index, %arg2: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4x?xf32>>{%arg1}
%1 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>{%arg1}
return
}
}
}
util.func public @tensor_dispatch_with_unknown_and_serialized_encodings(%arg0: !stream.resource<external>, %arg1: index, %arg2: index, %arg3: index) -> !stream.resource<*> {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg2}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @executable::@dispatch(%0, %arg3) : (tensor<4x?xf32, #unknown_encoding>{%arg2} in !stream.resource<*>{%arg1}, index) -> tensor<4x?xf32, #serialized_encoding>{%arg2} in !stream.resource<*>{%arg1}
util.return %1 : !stream.resource<*>
}
// CHECK-DAG: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK-DAG: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123>]>
// CHECK: stream.executable
// CHECK: func.func
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-DAG: stream.binding.subspan %[[ARG0]]{{.+}} !flow.dispatch.tensor<readonly:tensor<4x?xf32>>
// CHECK-DAG: stream.binding.subspan %[[ARG2]]{{.+}} !flow.dispatch.tensor<writeonly:tensor<4x?xf32, #[[$SERIALIZED_ENCODING]]>>
// CHECK-LABEL: util.func public @tensor_dispatch_with_unknown_and_serialized_encodings(
// CHECK: stream.tensor.dispatch
// CHECK: tensor<4x?xf32, #[[$UNKNOWN_ENCODING]]>
// CHECK: tensor<4x?xf32, #[[$SERIALIZED_ENCODING]]>
// -----
// Test that the unserialized encoding is serialized, and the unknown encoding
// is the same.
#unknown_encoding = #iree_encoding.unknown_encoding
#encoding = #iree_encoding.testing_encoding<>
#executable_target_a = #hal.executable.target<"target_a", "abc", {iree.encoding.resolver = #iree_encoding.unspecialized_encoding<123>}>
#device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_a]> : !hal.device
util.global private @device_a = #device_target_local_0_
stream.executable private @executable {
stream.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !stream.binding, %arg1: index, %arg2: !stream.binding) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4x?xf32>>{%arg1}
%1 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>{%arg1}
return
}
}
}
util.func public @tensor_dispatch_with_unknown_and_unserialized_encodings(%arg0: !stream.resource<external>, %arg1: index, %arg2: index, %arg3: index) -> !stream.resource<*> {
%0 = stream.async.transfer %arg0 : !stream.resource<external>{%arg2} from(#hal.device.affinity<@device_a>) -> to(#hal.device.affinity<@device_a>) !stream.resource<*>{%arg2}
%1 = stream.tensor.dispatch on(#hal.device.affinity<@device_a>) @executable::@dispatch(%0, %arg3) : (tensor<4x?xf32, #unknown_encoding>{%arg2} in !stream.resource<*>{%arg1}, index) -> tensor<4x?xf32, #encoding>{%arg2} in !stream.resource<*>{%arg1}
util.return %1 : !stream.resource<*>
}
// CHECK-DAG: #[[$UNKNOWN_ENCODING:.+]] = #iree_encoding.unknown_encoding
// CHECK-DAG: #[[$SERIALIZED_ENCODING:.+]] = #iree_encoding.testing_encoding<[#iree_encoding.specialized_encoding<123, tensor<4x?xf32>>]>
// CHECK: stream.executable
// CHECK: func.func
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
// CHECK-DAG: stream.binding.subspan %[[ARG0]]{{.+}} !flow.dispatch.tensor<readonly:tensor<4x?xf32>>
// CHECK-DAG: stream.binding.subspan %[[ARG2]]{{.+}} !flow.dispatch.tensor<writeonly:tensor<4x?xf32, #[[$SERIALIZED_ENCODING]]>>
// CHECK-LABEL: util.func public @tensor_dispatch_with_unknown_and_unserialized_encodings(
// CHECK: stream.tensor.dispatch
// CHECK: tensor<4x?xf32, #[[$UNKNOWN_ENCODING]]>
// CHECK: tensor<4x?xf32, #[[$SERIALIZED_ENCODING]]>
// -----
//------------------------------------------------------------------------------
// Negative tests. The pass should do nothing for the cases.
//------------------------------------------------------------------------------
// It does not fail because there are no encodings on stream.tensor.dispatch
// ops.
hal.executable.source public @executable {
hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout<constants = 0, bindings = [
#hal.pipeline.binding<storage_buffer>
]>)
}
util.func public @dispatch_hal_executable(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> !stream.resource<*> {
%0 = stream.tensor.dispatch @executable::@dispatch(%arg0) : (tensor<4x?xf32>{%arg2} in !stream.resource<*>{%arg1}) -> tensor<4x?xf32>{%arg2} in !stream.resource<*>{%arg1}
util.return %0 : !stream.resource<*>
}
// CHECK-LABEL: util.func public @dispatch_hal_executable(
// -----
// It does not fail because the executable does not match the requirements.
#encoding = #iree_encoding.unknown_encoding
util.global private @device : !hal.device
hal.executable.source public @executable {
hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout<constants = 0, bindings = [
#hal.pipeline.binding<storage_buffer>
]>)
}
util.func public @dispatch_hal_executable_with_encodings(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> !stream.resource<*> {
%0 = stream.tensor.dispatch on(#hal.device.affinity<@device>) @executable::@dispatch(%arg0) : (tensor<4x?xf32, #encoding>{%arg2} in !stream.resource<*>{%arg1}) -> tensor<4x?xf32, #encoding>{%arg2} in !stream.resource<*>{%arg1}
util.return %0 : !stream.resource<*>
}
// CHECK-LABEL: util.func public @dispatch_hal_executable_with_encodings(