blob: 40f2fee9cb0e9f2ddb3fa1808d8469598ed58db7 [file] [log] [blame]
Quinn Dawkins466e8122023-10-25 12:42:26 -04001// Source IR for the following. Skips dispatch formation to isolate testing to
2// codegen.
3//
4// !A_size = tensor<16x5xf32>
5// !B_size = tensor<5x16xf32>
6// !C_size = tensor<16x16xf32>
7// !O_size = tensor<16xf32>
Ben Vanikfadc0182024-02-20 13:00:22 -08008//
Quinn Dawkins466e8122023-10-25 12:42:26 -04009// module {
10// func.func @example_module(%A : !A_size, %B : !B_size, %C : !C_size) -> !O_size {
11// %0 = linalg.add ins(%A, %A : !A_size, !A_size)
12// outs(%A : !A_size) -> !A_size
13// %1 = linalg.matmul ins(%0, %B : !A_size, !B_size)
14// outs(%C : !C_size) -> !C_size
15// %empty = tensor.empty() : !O_size
16// %2 = linalg.reduce
17// ins(%1 : !C_size)
18// outs(%empty : !O_size)
Ben Vanikfadc0182024-02-20 13:00:22 -080019// dimensions = [1]
Quinn Dawkins466e8122023-10-25 12:42:26 -040020// (%in: f32, %out: f32) {
Ben Vanikfadc0182024-02-20 13:00:22 -080021// %3 = arith.addf %out, %in: f32
22// linalg.yield %3: f32
Quinn Dawkins466e8122023-10-25 12:42:26 -040023// }
24// return %2 : !O_size
25// }
26// }
27
Lei Zhang90f29a62024-06-19 19:38:33 -070028#target = #iree_gpu.target<arch = "", features = "spirv:v1.3,cap:Shader", wgp = <
29 compute = fp32|int32, storage = b32, subgroup = none, dot = none, mma = [], subgroup_size_choices = [64, 64],
Krzysztof Drewniak300af392024-08-14 11:16:29 -070030 max_workgroup_sizes = [128, 128, 64], max_thread_count_per_workgroup = 128, max_workgroup_memory_bytes = 16384, max_workgroup_counts = [65535, 65535, 65535]>>
Quinn Dawkins466e8122023-10-25 12:42:26 -040031
Ben Vanike2a2b2b2024-08-22 11:56:59 -070032#pipeline_layout_0 = #hal.pipeline.layout<bindings = [
33 #hal.pipeline.binding<storage_buffer, ReadOnly>,
34 #hal.pipeline.binding<storage_buffer>
35]>
36#pipeline_layout_1 = #hal.pipeline.layout<bindings = [
37 #hal.pipeline.binding<storage_buffer, ReadOnly>,
38 #hal.pipeline.binding<storage_buffer, ReadOnly>,
39 #hal.pipeline.binding<storage_buffer>
40]>
41#pipeline_layout_2 = #hal.pipeline.layout<bindings = [
42 #hal.pipeline.binding<storage_buffer, ReadOnly>,
43 #hal.pipeline.binding<storage_buffer>
44]>
Ben Vanik21934062024-08-05 09:49:13 -070045
Ben Vanik4b1a4e22024-02-27 16:00:41 -080046module attributes {
47 hal.device.targets = [
48 #hal.device.target<"vulkan", [
Ben Vanik890b0702024-02-29 10:28:37 -080049 #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
Lei Zhang90f29a62024-06-19 19:38:33 -070050 iree.gpu.target = #target
Ben Vanik4b1a4e22024-02-27 16:00:41 -080051 }>
Ben Vanikc05323f2024-05-21 09:40:28 -070052 ]> : !hal.device
Ben Vanik4b1a4e22024-02-27 16:00:41 -080053 ]
54} {
Quinn Dawkins466e8122023-10-25 12:42:26 -040055 hal.executable private @example_module_dispatch_0 {
Lei Zhang90f29a62024-06-19 19:38:33 -070056 hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.gpu.target = #target}>) {
Ben Vanik21934062024-08-05 09:49:13 -070057 hal.executable.export public @example_module_dispatch_0_generic_80_f32 ordinal(0) layout(#pipeline_layout_0) {
Quinn Dawkins466e8122023-10-25 12:42:26 -040058 ^bb0(%arg0: !hal.device):
Ben Vanikfadc0182024-02-20 13:00:22 -080059 %x, %y, %z = flow.dispatch.workgroup_count_from_slice
Quinn Dawkins466e8122023-10-25 12:42:26 -040060 hal.return %x, %y, %z : index, index, index
61 }
62 builtin.module {
63 func.func @example_module_dispatch_0_generic_80_f32() {
64 %c0 = arith.constant 0 : index
Ben Vanike2a2b2b2024-08-22 11:56:59 -070065 %0 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<80xf32>>
66 %1 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<80xf32>>
Quinn Dawkins466e8122023-10-25 12:42:26 -040067 %2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [80], strides = [1] : !flow.dispatch.tensor<readonly:tensor<80xf32>> -> tensor<80xf32>
68 %3 = tensor.empty() : tensor<80xf32>
69 %4 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%2 : tensor<80xf32>) outs(%3 : tensor<80xf32>) {
70 ^bb0(%in: f32, %out: f32):
71 %5 = arith.addf %in, %in : f32
72 linalg.yield %5 : f32
73 } -> tensor<80xf32>
74 flow.dispatch.tensor.store %4, %1, offsets = [0], sizes = [80], strides = [1] : tensor<80xf32> -> !flow.dispatch.tensor<writeonly:tensor<80xf32>>
75 return
76 }
77 }
78 }
79 }
80 hal.executable private @example_module_dispatch_1 {
Lei Zhang90f29a62024-06-19 19:38:33 -070081 hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.gpu.target = #target}>) {
Ben Vanik21934062024-08-05 09:49:13 -070082 hal.executable.export public @example_module_dispatch_1_matmul_16x16x5_f32 ordinal(0) layout(#pipeline_layout_1) {
Quinn Dawkins466e8122023-10-25 12:42:26 -040083 ^bb0(%arg0: !hal.device):
Ben Vanikfadc0182024-02-20 13:00:22 -080084 %x, %y, %z = flow.dispatch.workgroup_count_from_slice
Quinn Dawkins466e8122023-10-25 12:42:26 -040085 hal.return %x, %y, %z : index, index, index
86 }
87 builtin.module {
88 func.func @example_module_dispatch_1_matmul_16x16x5_f32() {
89 %c0 = arith.constant 0 : index
Ben Vanike2a2b2b2024-08-22 11:56:59 -070090 %0 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x5xf32>>
91 %1 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5x16xf32>>
92 %2 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
Quinn Dawkins466e8122023-10-25 12:42:26 -040093 %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x5xf32>> -> tensor<16x5xf32>
94 %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [5, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<5x16xf32>> -> tensor<5x16xf32>
95 %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
96 %6 = linalg.matmul ins(%3, %4 : tensor<16x5xf32>, tensor<5x16xf32>) outs(%5 : tensor<16x16xf32>) -> tensor<16x16xf32>
97 flow.dispatch.tensor.store %6, %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
98 return
99 }
100 }
101 }
102 }
103 hal.executable private @example_module_dispatch_2 {
Lei Zhang90f29a62024-06-19 19:38:33 -0700104 hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.gpu.target = #target}>) {
Ben Vanik21934062024-08-05 09:49:13 -0700105 hal.executable.export public @example_module_dispatch_2_generic_16x16_f32 ordinal(0) layout(#pipeline_layout_2) {
Quinn Dawkins466e8122023-10-25 12:42:26 -0400106 ^bb0(%arg0: !hal.device):
Ben Vanikfadc0182024-02-20 13:00:22 -0800107 %x, %y, %z = flow.dispatch.workgroup_count_from_slice
Quinn Dawkins466e8122023-10-25 12:42:26 -0400108 hal.return %x, %y, %z : index, index, index
109 }
110 builtin.module {
111 func.func @example_module_dispatch_2_generic_16x16_f32() {
112 %c0 = arith.constant 0 : index
Ben Vanike2a2b2b2024-08-22 11:56:59 -0700113 %0 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16xf32>>
114 %1 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
Quinn Dawkins466e8122023-10-25 12:42:26 -0400115 %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
116 %3 = tensor.empty() : tensor<16xf32>
117 %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<16x16xf32>) outs(%3 : tensor<16xf32>) {
118 ^bb0(%in: f32, %out: f32):
119 %5 = arith.addf %out, %in : f32
120 linalg.yield %5 : f32
121 } -> tensor<16xf32>
122 flow.dispatch.tensor.store %4, %1, offsets = [0], sizes = [16], strides = [1] : tensor<16xf32> -> !flow.dispatch.tensor<writeonly:tensor<16xf32>>
123 return
124 }
125 }
126 }
127 }
128}
129
130/// We test first with threading off so that the printers are legible.
Quinn Dawkinsb9fdcce2024-02-15 19:23:21 -0500131// RUN: iree-compile %s --iree-hal-target-backends=vulkan \
132// RUN: --iree-codegen-transform-dialect-library=%p/transform_library.mlir@kernel_config \
133// RUN: --compile-from=executable-sources \
134// RUN: --compile-to=executable-targets \
135// RUN: --mlir-disable-threading | \
136// RUN: FileCheck %s --check-prefixes=CODEGEN-PRINTER
Quinn Dawkins466e8122023-10-25 12:42:26 -0400137
Quinn Dawkinsb9fdcce2024-02-15 19:23:21 -0500138// CODEGEN-PRINTER: IR printer: Setting matmul strategy to custom_transform_strategy
139// CODEGEN-PRINTER: translation_info = #iree_codegen.translation_info<TransformDialectCodegen codegen_spec = @custom_transform_strategy>
Quinn Dawkins466e8122023-10-25 12:42:26 -0400140// CODEGEN-PRINTER: IR printer: Setting reduce strategy to base vectorize top-level
MaheshRavishankar954cb362024-04-12 14:00:49 -0700141// CODEGEN-PRINTER: translation_info = #iree_codegen.translation_info<SPIRVBaseVectorize workgroup_size = [16, 1, 1]>
Quinn Dawkins466e8122023-10-25 12:42:26 -0400142
143/// Then test with threading to make sure it runs
144// RUN: iree-compile %s --iree-hal-target-backends=vulkan \
Quinn Dawkinsb9fdcce2024-02-15 19:23:21 -0500145// RUN: --iree-codegen-transform-dialect-library=%p/transform_library.mlir@kernel_config \
Quinn Dawkins466e8122023-10-25 12:42:26 -0400146// RUN: --compile-from=executable-sources \
147// RUN: --compile-to=executable-targets \
148// RUN: --mlir-disable-threading | \
149// RUN: FileCheck %s --check-prefixes=CODEGEN
150
151// CODEGEN: spirv.func @example_module_dispatch_0_generic_80_f32
MaheshRavishankar954cb362024-04-12 14:00:49 -0700152// CODEGEN: spirv.func @example_module_dispatch_1_matmul_16x16x5_f32
Quinn Dawkins466e8122023-10-25 12:42:26 -0400153// CODEGEN: spirv.func @example_module_dispatch_2_generic_16x16_f32