[spirv] Port more tests over to dynamic pipelines (#6749)
Now we need to attach the configurations onto the ops themselves.
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
index 186a6c3..05cc9fd 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
@@ -544,6 +544,10 @@
RewritePatternSet canoncalizationPatterns(context);
populateAffineMinSCFCanonicalizationPattern(canoncalizationPatterns);
SmallVector<int64_t> workgroupSize = getWorkgroupSize(entryPointOp);
+ if (workgroupSize.empty()) {
+ entryPointOp.emitError("expected to have workgroup_size attribute");
+ return signalPassFailure();
+ }
auto getThreadRangeFn = [workgroupSize](Value processorValue,
SmallVectorImpl<Value> &dims,
SmallVectorImpl<Value> &symbols) {
diff --git a/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir b/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
index a7d78af..5c73cec 100644
--- a/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
@@ -1,133 +1,187 @@
-// RUN: iree-opt -split-input-file -iree-convert-to-spirv %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv)))' %s | IreeFileCheck %s
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
- // CHECK-LABEL: spv.module
- // CHECK: spv.GlobalVariable @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
- // CHECK: spv.func @push_constant()
- func @push_constant() {
- // CHECK: %[[INDEX_0:.+]] = spv.Constant 0 : i32
- // CHECK: %[[INDEX_1:.+]] = spv.Constant 2 : i32
- // CHECK: %[[ADDR:.+]] = spv.mlir.addressof @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
- // CHECK: %[[AC:.+]] = spv.AccessChain %[[ADDR]][%[[INDEX_0]], %[[INDEX_1]]] : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
- // CHECK: spv.Load "PushConstant" %[[AC]] : i32
- %0 = hal.interface.load.constant offset = 2 : index
- return
- }
-
+hal.executable @push_constant attributes {sym_visibility = "private"} {
hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @push_constant attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+ // CHECK-LABEL: spv.module
+ // CHECK: spv.GlobalVariable @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+ // CHECK: spv.func @push_constant()
+ func @push_constant() {
+ // CHECK: %[[INDEX_0:.+]] = spv.Constant 0 : i32
+ // CHECK: %[[INDEX_1:.+]] = spv.Constant 2 : i32
+ // CHECK: %[[ADDR:.+]] = spv.mlir.addressof @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+ // CHECK: %[[AC:.+]] = spv.AccessChain %[[ADDR]][%[[INDEX_0]], %[[INDEX_1]]] : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+ // CHECK: spv.Load "PushConstant" %[[AC]] : i32
+ %0 = hal.interface.load.constant offset = 2 : index
+ return
+ }
+
+ hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write"
+ }
+ }
+ }
}
// -----
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
- // CHECK-LABEL: spv.module
- // CHECK: spv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spv.func @resource_bindings_in_same_entry_func()
- func @resource_bindings_in_same_entry_func() {
- %c0 = constant 0 : index
-
- // Same type
- // CHECK: spv.mlir.addressof @[[ARG0]]
- // CHECK: spv.mlir.addressof @[[ARG0]]
- %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
- %1 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
-
- // Different type
- // CHECK: spv.mlir.addressof @[[ARG1_0]]
- // CHECK: spv.mlir.addressof @[[ARG1_1]]
- %2 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4x4xf32>
- %3 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4xvector<4xf32>>
-
- // CHECK: spv.mlir.addressof @[[RET0]]
- %4 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32>
-
- %5 = memref.load %0[%c0, %c0] : memref<4x4xf32>
- %6 = memref.load %1[%c0, %c0] : memref<4x4xf32>
-
- %7 = memref.load %2[%c0, %c0] : memref<4x4xf32>
- %8 = memref.load %3[%c0] : memref<4xvector<4xf32>>
-
- %9 = memref.load %4[%c0, %c0] : memref<4x4xf32>
-
- return
- }
-
+hal.executable @resource_bindings_in_same_func attributes {sym_visibility = "private"} {
hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=1, binding=3, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @resource_bindings_in_same_func attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+ // CHECK-LABEL: spv.module
+ // CHECK: spv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spv.func @resource_bindings_in_same_entry_func()
+ func @resource_bindings_in_same_entry_func() {
+ %c0 = constant 0 : index
+
+ // Same type
+ // CHECK: spv.mlir.addressof @[[ARG0]]
+ // CHECK: spv.mlir.addressof @[[ARG0]]
+ %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+ %1 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+
+ // Different type
+ // CHECK: spv.mlir.addressof @[[ARG1_0]]
+ // CHECK: spv.mlir.addressof @[[ARG1_1]]
+ %2 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4x4xf32>
+ %3 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4xvector<4xf32>>
+
+ // CHECK: spv.mlir.addressof @[[RET0]]
+ %4 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32>
+
+ %5 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+ %6 = memref.load %1[%c0, %c0] : memref<4x4xf32>
+
+ %7 = memref.load %2[%c0, %c0] : memref<4x4xf32>
+ %8 = memref.load %3[%c0] : memref<4xvector<4xf32>>
+
+ %9 = memref.load %4[%c0, %c0] : memref<4x4xf32>
+
+ return
+ }
+
+ hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=1, binding=3, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
+ }
+ }
+ }
}
// -----
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
- // CHECK-LABEL: spv.module
- // CHECK: spv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-
- // CHECK: spv.func @resource_bindings_in_entry_func1()
- func @resource_bindings_in_entry_func1() {
- // CHECK: spv.mlir.addressof @[[FUNC1_ARG]]
- // CHECK: spv.mlir.addressof @[[FUNC1_RET]]
- %c0 = constant 0 : index
- %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
- %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4xvector<4xf32>>
-
- %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
- %3 = memref.load %1[%c0] : memref<4xvector<4xf32>>
-
- return
- }
-
- // CHECK: spv.func @resource_bindings_in_entry_func2()
- func @resource_bindings_in_entry_func2() {
- // CHECK: spv.mlir.addressof @[[FUNC2_ARG]]
- // CHECK: spv.mlir.addressof @[[FUNC2_RET]]
- %c0 = constant 0 : index
- %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32> // Same type as previous function
- %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32> // Different type as previous function
-
- %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
- %3 = memref.load %1[%c0, %c0] : memref<4x4xf32>
-
- return
- }
-
+hal.executable @resource_bindings_in_multi_entry_func attributes {sym_visibility = "private"} {
hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @resource_bindings_in_entry_func1 attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ hal.executable.entry_point @resource_bindings_in_entry_func2 attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+ // CHECK-LABEL: spv.module
+ // CHECK: spv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+
+ // CHECK: spv.func @resource_bindings_in_entry_func1()
+ func @resource_bindings_in_entry_func1() {
+ // CHECK: spv.mlir.addressof @[[FUNC1_ARG]]
+ // CHECK: spv.mlir.addressof @[[FUNC1_RET]]
+ %c0 = constant 0 : index
+ %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+ %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4xvector<4xf32>>
+
+ %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+ %3 = memref.load %1[%c0] : memref<4xvector<4xf32>>
+
+ return
+ }
+
+ // CHECK: spv.func @resource_bindings_in_entry_func2()
+ func @resource_bindings_in_entry_func2() {
+ // CHECK: spv.mlir.addressof @[[FUNC2_ARG]]
+ // CHECK: spv.mlir.addressof @[[FUNC2_RET]]
+ %c0 = constant 0 : index
+ %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32> // Same type as previous function
+ %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32> // Different type as previous function
+
+ %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+ %3 = memref.load %1[%c0, %c0] : memref<4x4xf32>
+
+ return
+ }
+
+ hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
+ }
+ }
+ }
}
// -----
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
- func @interface_binding() {
- %c0 = constant 0 : index
- %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<8x5xf32>
- %1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<5xf32>
- %2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<8x5xf32>
-
- %3 = memref.load %0[%c0, %c0] : memref<8x5xf32>
- %4 = memref.load %1[%c0] : memref<5xf32>
- %5 = memref.load %2[%c0, %c0] : memref<8x5xf32>
-
- return
- }
+hal.executable @interface_binding attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @interface_binding attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
+ func @interface_binding() {
+ %c0 = constant 0 : index
+ %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<8x5xf32>
+ %1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<5xf32>
+ %2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<8x5xf32>
+
+ %3 = memref.load %0[%c0, %c0] : memref<8x5xf32>
+ %4 = memref.load %1[%c0] : memref<5xf32>
+ %5 = memref.load %2[%c0, %c0] : memref<8x5xf32>
+
+ return
+ }
+ hal.interface @io attributes {sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+ }
+ }
+ }
}
// Explicitly check the variable symbols
@@ -143,18 +197,32 @@
// -----
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
- func @interface_wg_id() {
- %0 = hal.interface.workgroup.id[0] : index
- %1 = hal.interface.workgroup.id[1] : index
- return
- }
+hal.executable @interface_wg_id attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @interface_wg_id attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
+ func @interface_wg_id() {
+ %0 = hal.interface.workgroup.id[0] : index
+ %1 = hal.interface.workgroup.id[1] : index
+ return
+ }
+ hal.interface @io attributes {sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+ }
+ }
+ }
}
+
// CHECK-LABEL: spv.module
// CHECK-DAG: spv.GlobalVariable @[[WGID:.+]] built_in("WorkgroupId")
// CHECK: spv.func
@@ -167,17 +235,30 @@
// -----
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
- func @interface_wg_count() {
- %0 = hal.interface.workgroup.count[0] : index
- %1 = hal.interface.workgroup.count[1] : index
- return
- }
+hal.executable @interface_wg_count attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
+ hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+ hal.executable.entry_point @interface_wg_count attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 1: index, 1: index]
+ }
+ module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>} {
+ func @interface_wg_count() {
+ %0 = hal.interface.workgroup.count[0] : index
+ %1 = hal.interface.workgroup.count[1] : index
+ return
+ }
+ hal.interface @io attributes {sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+ }
+ }
+ }
}
// CHECK-LABEL: spv.module
// CHECK-DAG: spv.GlobalVariable @[[WGCOUNT:.+]] built_in("NumWorkgroups")
diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index 936962e..1b4bfe3 100644
--- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-hlo-to-spirv-pipeline))' -iree-spirv-workgroup-tile-size=8,64,4 -iree-spirv-invocation-tile-size=8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-linalg-to-spirv-pipeline))' %s | IreeFileCheck %s
+
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
hal.executable @fuse_and_vectorize_fill_matmul attributes {sym_visibility = "private"} {
hal.interface @io {
@@ -8,8 +10,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @fuse_and_vectorize_fill_matmul attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @fuse_and_vectorize_fill_matmul() {
@@ -40,8 +43,8 @@
%13 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg0)[%workgroup_size_y]
%14 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg1)[%workgroup_size_x]
%15 = linalg.init_tensor [%13, %14] : tensor<?x?xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
- %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %10 : tensor<?x4096xf32>, tensor<4096x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %16 = linalg.fill(%cst, %15) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %10 : tensor<?x4096xf32>, tensor<4096x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:4096x4096xf32>
}
}
@@ -66,6 +69,8 @@
// -----
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
+
hal.executable @fuse_and_vectorize_matmul_add attributes {sym_visibility = "private"} {
hal.interface @io {
hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -74,8 +79,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @fuse_and_vectorize_matmul_add attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @fuse_and_vectorize_matmul_add() {
@@ -112,9 +118,9 @@
%18 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg0)[%workgroup_size_y]
%19 = affine.min affine_map<(d0)[s0] -> (-d0 + 256, s0)>(%arg1)[%workgroup_size_x]
%20 = linalg.init_tensor [%18, %19] : tensor<?x?xf32>
- %21 = linalg.fill(%cst, %20) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
- %22 = linalg.matmul ins(%15, %17 : tensor<?x512xf32>, tensor<512x?xf32>) outs(%21 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%13 : tensor<?x?xf32>) attrs = {__internal_linalg_transform__ = "workgroup"} {
+ %21 = linalg.fill(%cst, %20) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %22 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%15, %17 : tensor<?x512xf32>, tensor<512x?xf32>) outs(%21 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%13 : tensor<?x?xf32>) attrs = {__internal_linalg_transform__ = "workgroup", lowering.config = #config} {
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): // no predecessors
%24 = addf %arg2, %arg3 : f32
linalg.yield %24 : f32
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
index 52a6cac..1ce42bd 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-tile-and-vectorize,canonicalize,cse))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-vectorize,canonicalize,cse))))' %s | IreeFileCheck %s
#map0 = affine_map<()[s0] -> (s0 * 8)>
#map1 = affine_map<()[s0, s1] -> (8, s1 - s0 * 8)>
@@ -8,6 +8,8 @@
#map5 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map6 = affine_map<(d0, d1, d2) -> (d0, d1)>
+#config = {tileSizes = [[8, 16, 0], [], [1, 1, 1]]}
+
hal.executable @matmul attributes {sym_visibility = "private"} {
hal.interface @io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -15,7 +17,11 @@
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
- hal.executable.entry_point @matmul attributes {interface = @io, ordinal = 0 : index}
+ hal.executable.entry_point @matmul attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 8: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [8, 16]}
+ }
module attributes {
spv.target_env =
#spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>,
@@ -48,7 +54,7 @@
%16 = memref.dim %arg2, %c1 : memref<?x?xf32>
%17 = affine.min #map1()[%1, %16]
%18 = memref.subview %arg2[%3, %10] [%15, %17] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
- linalg.matmul {__internal_linalg_transform__ = "workgroup"}
+ linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config}
ins(%7, %13 : memref<?x?xf32, #map3>, memref<?x?xf32, #map3>)
outs(%18 : memref<?x?xf32, #map3>)
}
@@ -77,6 +83,8 @@
// -----
+#config = {tileSizes = [[1, 4, 32], [], [1, 1, 1]]}
+
hal.executable @conv_1d attributes {sym_visibility = "private"} {
hal.interface @io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -84,7 +92,11 @@
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
- hal.executable.entry_point @conv_1d attributes {interface = @io, ordinal = 0 : index}
+ hal.executable.entry_point @conv_1d attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 4: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+ }
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>} {
func @conv_1d() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
@@ -107,7 +119,7 @@
%15 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 1)>()[%3]
%16 = memref.subview %0[%5, %12, %14] [1, %13, %15] [1, 1, 1] : memref<3x6x1xf32> to memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>
%17 = memref.subview %0[%5, %12, %9] [1, %13, %10] [1, 1, 1] : memref<3x6x1xf32> to memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>
- linalg.conv_1d_input_nwc_filter_wcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%8, %11 : memref<1x?x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 8 + s0 + d1 + d2)>>, memref<3x1x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>) outs(%16 : memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>)
+ linalg.conv_1d_input_nwc_filter_wcf { __internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%8, %11 : memref<1x?x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 8 + s0 + d1 + d2)>>, memref<3x1x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>) outs(%16 : memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>)
return
}
hal.interface @io attributes {sym_visibility = "private"} {
@@ -157,6 +169,8 @@
#map6 = affine_map<(d0)[s0] -> (4, -d0 + s0)>
#map7 = affine_map<(d0)[s0] -> (32, -d0 + s0)>
+#config = {tileSizes = [[0, 1, 4, 32], [], [0, 1, 1, 1]]}
+
hal.executable @conv_no_padding attributes {sym_visibility = "private"} {
hal.interface @io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -164,7 +178,11 @@
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
- hal.executable.entry_point @conv_no_padding attributes {interface = @io, ordinal = 0 : index}
+ hal.executable.entry_point @conv_no_padding attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 4: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+ }
module attributes {
spv.target_env =
#spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>,
@@ -213,6 +231,7 @@
: memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map5>
linalg.conv_2d_input_nhwc_filter_hwcf {
__internal_linalg_transform__ = "workgroup",
+ lowering.config = #config,
dilations = dense<1> : tensor<2xi64>,
strides = dense<2> : tensor<2xi64>}
ins(%21, %arg0 : memref<?x?x?x?xf32, #map5>, memref<?x?x?x?xf32>)
@@ -270,6 +289,8 @@
// -----
+#config = {tileSizes = [[0, 0, 1, 4, 32], [], [0, 0, 1, 1, 1]]}
+
hal.executable @conv_3d attributes {sym_visibility = "private"} {
hal.interface @io {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -277,7 +298,11 @@
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
- hal.executable.entry_point @conv_3d attributes {interface = @io, ordinal = 0 : index}
+ hal.executable.entry_point @conv_3d attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 4: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+ }
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>} {
func @conv_3d() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
%cst = constant 0.000000e+00 : f32
@@ -299,7 +324,7 @@
%14 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 7)>()[%3]
%15 = memref.subview %0[%5, %11, %13, 0, 0] [1, %12, %14, 7, 2] [1, 1, 1, 1, 1] : memref<2x7x7x7x2xf32> to memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>
%16 = memref.subview %0[%5, %11, %13, 0, 0] [1, %12, %14, 7, 2] [1, 1, 1, 1, 1] : memref<2x7x7x7x2xf32> to memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>
- linalg.conv_3d_input_ndhwc_filter_dhwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%10, %2 : memref<1x?x?x8x3xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 1536 + s0 + d1 * 192 + d2 * 24 + d3 * 3 + d4)>>, memref<2x2x2x3x2xf32>) outs(%15 : memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>)
+ linalg.conv_3d_input_ndhwc_filter_dhwcf {__internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%10, %2 : memref<1x?x?x8x3xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 1536 + s0 + d1 * 192 + d2 * 24 + d3 * 3 + d4)>>, memref<2x2x2x3x2xf32>) outs(%15 : memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>)
return
}
hal.interface @io attributes {sym_visibility = "private"} {
@@ -334,6 +359,9 @@
#map5 = affine_map<()[s0] -> (4, s0 * -4 + 14)>
#map6 = affine_map<()[s0] -> (32, s0 * -32 + 13)>
#map7 = affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 1092 + s0 + d1 * 78 + d2 * 6 + d3)>
+
+#config = {tileSizes = [[1, 4, 32], [], [1, 1, 1]]}
+
module {
hal.executable @pooling_nhwc_max attributes {sym_visibility = "private"} {
hal.interface @io {
@@ -342,11 +370,10 @@
hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
- hal.executable.entry_point @pooling_nhwc_max attributes {interface = @io, ordinal = 0 : index} {
- ^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors
- %c4 = constant 4 : index
- %c1 = constant 1 : index
- hal.return %c1, %c4, %c1 : index, index, index
+ hal.executable.entry_point @pooling_nhwc_max attributes {
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [32: index, 4: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
func @pooling_nhwc_max() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
@@ -364,7 +391,7 @@
%10 = affine.min #map5()[%4]
%11 = affine.min #map6()[%3]
%12 = memref.subview %2[0, %5, %7, 0] [2, %10, %11, 6] [1, 1, 1, 1] : memref<2x14x13x6xf32> to memref<2x?x?x6xf32, #map7>
- linalg.pooling_nhwc_max {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%9, %1 : memref<2x?x?x6xf32, #map4>, memref<3x4xf32>) outs(%12 : memref<2x?x?x6xf32, #map7>)
+ linalg.pooling_nhwc_max {__internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%9, %1 : memref<2x?x?x6xf32, #map4>, memref<3x4xf32>) outs(%12 : memref<2x?x?x6xf32, #map7>)
return
}
hal.interface @io attributes {sym_visibility = "private"} {
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index 7378c56..1cbb881 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-concretize-workgroup-tiles,iree-spirv-tile-and-vectorize))' -canonicalize -cse -iree-spirv-workgroup-tile-size=1,8,64,4 -iree-spirv-invocation-tile-size=1,8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
+
+#config = {tileSizes = [[1, 8, 64, 4], [], [1, 8, 4, 4]]}
hal.executable @batch_matmul_static_shape attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
@@ -8,8 +10,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @batch_matmul_static_shape attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8, 1]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @batch_matmul_static_shape() {
@@ -43,7 +46,9 @@
%12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 1024)>(%arg2)[%workgroup_size_x]
%13 = memref.subview %1[%arg0, 0, %arg2] [%9, 1024, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
%14 = memref.subview %2[%arg0, %arg1, %arg2] [%9, %10, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
- linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+ linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config}
+ ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+ outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
}
}
}
@@ -74,21 +79,29 @@
// CHECK-DAG: %[[C6:.+]] = constant 6 : index
// CHECK-DAG: %[[C7:.+]] = constant 7 : index
// CHECK: %[[BIDX:.+]] = hal.interface.workgroup.id[0]
+// CHECK: %[[BCNTX:.+]] = hal.interface.workgroup.count[0]
// CHECK: %[[BIDY:.+]] = hal.interface.workgroup.id[1]
+// CHECK: %[[BCNTY:.+]] = hal.interface.workgroup.count[1]
// CHECK: %[[BIDZ:.+]] = hal.interface.workgroup.id[2]
-// CHECK-DAG: %[[BOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-// CHECK-DAG: %[[BOFFSET_X:.+]] = affine.apply #[[MAP1]]()[%[[BIDX]]]
+// CHECK: %[[BCNTZ:.+]] = hal.interface.workgroup.count[2]
+// CHECK: scf.for %[[IVZ:.+]] = %[[BIDZ]] to %{{.+}} step %[[BCNTZ]]
+// CHECK: %[[BOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
+// CHECK: %[[UBY:.+]] = affine.apply #[[MAP0]]()[%[[BCNTY]]]
+// CHECK: scf.for %[[IVY:.+]] = %[[BOFFSET_Y]] to %{{.+}} step %[[UBY]]
+// CHECK: %[[BOFFSET_X:.+]] = affine.apply #[[MAP1]]()[%[[BIDX]]]
+// CHECK: %[[UBX:.+]] = affine.apply #[[MAP1]]()[%[[BCNTX]]]
// CHECK: %[[SUBVIEW_ARG0:.+]] = memref.subview %[[ARG0]]
-// CHECK-SAME: [%[[BIDZ]], %[[BOFFSET_Y]], 0] [1, 8, 1024]
-// CHECK: %[[SUBVIEW_ARG1:.+]] = memref.subview %[[ARG1]]
-// CHECK-SAME: [%[[BIDZ]], 0, %[[BOFFSET_X]]] [1, 1024, 64]
-// CHECK: %[[SUBVIEW_RESULT:.+]] = memref.subview %[[RET0]]
-// CHECK-SAME: [%[[BIDZ]], %[[BOFFSET_Y]], %[[BOFFSET_X]]] [1, 8, 64]
+// CHECK-SAME: [%[[IVZ]], %[[IVY]], 0] [1, 8, 1024]
// CHECK: %[[IIDX:.+]] = "gpu.thread_id"() {dimension = "x"}
// CHECK: %[[IIDY:.+]] = "gpu.thread_id"() {dimension = "y"}
// CHECK: %[[IIDZ:.+]] = "gpu.thread_id"() {dimension = "z"}
// CHECK-DAG: %[[IOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[IIDY]]]
// CHECK-DAG: %[[IOFFSET_X:.+]] = affine.apply #[[MAP2]]()[%[[IIDX]]]
+// CHECK: scf.for %[[IVX:.+]] = %[[BOFFSET_X]] to %{{.+}} step %[[UBX]]
+// CHECK: %[[SUBVIEW_ARG1:.+]] = memref.subview %[[ARG1]]
+// CHECK-SAME: [%[[IVZ]], 0, %[[IVX]]] [1, 1024, 64]
+// CHECK: %[[SUBVIEW_RESULT:.+]] = memref.subview %[[RET0]]
+// CHECK-SAME: [%[[IVZ]], %[[IVY]], %[[IVX]]] [1, 8, 64]
// CHECK: %[[SUBVIEW_RESULT_2:.+]] = memref.subview %[[SUBVIEW_RESULT]]
// CHECK-SAME: [%[[IIDZ]], %[[IOFFSET_Y]], %[[IOFFSET_X]]] [1, 8, 4]
// CHECK-DAG: %[[READ_INIT_0:.+]] = vector.transfer_read
@@ -108,7 +121,7 @@
// CHECK-DAG: %[[READ_INIT_7:.+]] = vector.transfer_read
// CHECK-SAME: %[[SUBVIEW_RESULT_2]][%[[C0]], %[[C7]], %[[C0]]]
-// CHECK: %[[FOR_RES:.+]]:8 = scf.for %[[IV0:.+]] = {{.*}} to
+// CHECK: %[[FOR_RES:.+]]:8 = scf.for %[[IV3:.+]] = {{.*}} to
// CHECK-SAME: iter_args(%[[ACC_0:.+]] = %[[READ_INIT_0]],
// CHECK-SAME: %[[ACC_1:.+]] = %[[READ_INIT_1]],
// CHECK-SAME: %[[ACC_2:.+]] = %[[READ_INIT_2]],
@@ -118,9 +131,9 @@
// CHECK-SAME: %[[ACC_6:.+]] = %[[READ_INIT_6]],
// CHECK-SAME: %[[ACC_7:.+]] = %[[READ_INIT_7]])
// CHECK-DAG: %[[SUBVIEW_LHS:.+]] = memref.subview %[[SUBVIEW_ARG0]]
-// CHECK-SAME: [%[[IIDZ]], %[[IOFFSET_Y]], %[[IV0]]] [1, 8, 4]
+// CHECK-SAME: [%[[IIDZ]], %[[IOFFSET_Y]], %[[IV3]]] [1, 8, 4]
// CHECK-DAG: %[[SUBVIEW_RHS:.+]] = memref.subview %[[SUBVIEW_ARG1]]
-// CHECK-SAME: [%[[IIDZ]], %[[IV0]], %[[IOFFSET_X]]] [1, 4, 4] [1, 1, 1]
+// CHECK-SAME: [%[[IIDZ]], %[[IV3]], %[[IOFFSET_X]]] [1, 4, 4] [1, 1, 1]
// CHECK-DAG: %[[READ_LHS_0:.+]] = vector.transfer_read %[[SUBVIEW_LHS]][%[[C0]], %[[C0]], %[[C0]]]
// CHECK-DAG: %[[READ_LHS_1:.+]] = vector.transfer_read %[[SUBVIEW_LHS]][%[[C0]], %[[C1]], %[[C0]]]
@@ -355,6 +368,8 @@
// -----
+#config = {tileSizes = [[1, 8, 64, 4], [], [1, 8, 4, 4]]}
+
hal.executable @fused_fill_batch_matmul attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -363,8 +378,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @fused_fill_batch_matmul attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8, 1]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @fused_fill_batch_matmul() {
@@ -399,8 +415,8 @@
%12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 1024)>(%arg2)[%workgroup_size_x]
%13 = memref.subview %1[%arg0, 0, %arg2] [%9, 1024, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
%14 = memref.subview %2[%arg0, %arg1, %arg2] [%9, %10, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
- linalg.fill(%zero, %14) : f32, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
- linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+ linalg.fill(%zero, %14) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
+ linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
}
}
}
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index 01f71a3..e3c3076 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-concretize-workgroup-tiles,iree-spirv-tile-and-vectorize))' -canonicalize -cse -iree-spirv-workgroup-tile-size=8,64,4 -iree-spirv-invocation-tile-size=8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
+
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
hal.executable @matmul_static_shape_f16 attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
@@ -8,8 +10,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @matmul_static_shape_f16 attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @matmul_static_shape_f16() {
@@ -36,8 +39,8 @@
%9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 4096)>(%arg1)[%workgroup_size_x]
%10 = memref.subview %2[%arg0, %arg1] [%7, %9] [1, 1] : memref<4096x4096xf16> to memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
%11 = memref.subview %1[0, %arg1] [4096, %9] [1, 1] : memref<4096x4096xf16> to memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
- linalg.fill(%cst, %10) {__internal_linalg_transform__ = "workgroup"} : f16, memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
- linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %11 : memref<?x4096xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%10 : memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
+ linalg.fill(%cst, %10) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f16, memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
+ linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %11 : memref<?x4096xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%10 : memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
}
}
return
@@ -63,6 +66,8 @@
// -----
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
+
hal.executable @matmul_static_shape_f32 attributes {sym_visibility = "private"} {
hal.interface @io attributes {sym_visibility = "private"} {
hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -71,8 +76,9 @@
}
hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
hal.executable.entry_point @matmul_static_shape_f32 attributes {
- interface = @io,
- ordinal = 0 : index
+ interface = @io, ordinal = 0 : index,
+ workgroup_size = [16: index, 1: index, 1: index],
+ translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
}
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>} {
func @matmul_static_shape_f32() {
@@ -99,8 +105,8 @@
%9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 4096)>(%arg1)[%workgroup_size_x]
%10 = memref.subview %1[0, %arg1] [4096, %9] [1, 1] : memref<4096x4096xf32> to memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
%11 = memref.subview %2[%arg0, %arg1] [%7, %9] [1, 1] : memref<4096x4096xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
- linalg.fill(%cst, %11) : f32, memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
- linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %10 : memref<?x4096xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%11 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
+ linalg.fill(%cst, %11) {__internal_linalg_transform__ = "workgroup", lowering.config = #config}: f32, memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
+ linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %10 : memref<?x4096xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%11 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
}
}
return
diff --git a/iree/compiler/Codegen/Utils/Utils.cpp b/iree/compiler/Codegen/Utils/Utils.cpp
index c062645..912e61e 100644
--- a/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/iree/compiler/Codegen/Utils/Utils.cpp
@@ -29,8 +29,7 @@
}
IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp) {
- auto variantOp =
- funcOp.getOperation()->getParentOfType<IREE::HAL::ExecutableVariantOp>();
+ auto variantOp = funcOp->getParentOfType<IREE::HAL::ExecutableVariantOp>();
for (auto op : variantOp.getOps<IREE::HAL::ExecutableEntryPointOp>()) {
if (op.sym_name() == funcOp.getName()) {
return op;
@@ -41,8 +40,7 @@
llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
ModuleOp module) {
- auto variantOp =
- module.getOperation()->getParentOfType<IREE::HAL::ExecutableVariantOp>();
+ auto variantOp = module->getParentOfType<IREE::HAL::ExecutableVariantOp>();
llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPointOps;
for (auto op : variantOp.getOps<IREE::HAL::ExecutableEntryPointOp>()) {
entryPointOps[op.sym_name()] = op;