Restructure launch_configuration tests into couple files. (#8882)
diff --git a/iree/compiler/Codegen/LLVMCPU/test/BUILD b/iree/compiler/Codegen/LLVMCPU/test/BUILD
index 910be59..fd83660 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/BUILD
+++ b/iree/compiler/Codegen/LLVMCPU/test/BUILD
@@ -26,7 +26,9 @@
"hal_interface_workgroup_info.mlir",
"illegal_configuration.mlir",
"linalg_transform.mlir",
- "materialize_launch_configuration.mlir",
+ "materialize_aarch64_launch_configuration.mlir",
+ "materialize_riscv_launch_configuration.mlir",
+ "materialize_x86_64_launch_configuration.mlir",
"synchronize_symbol_visibility.mlir",
"test_config_mmt4d.mlir",
"tile_fuse_and_vectorize.mlir",
diff --git a/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
index 6d7d25a..6f6cec0 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
+++ b/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
@@ -20,7 +20,9 @@
"hal_interface_workgroup_info.mlir"
"illegal_configuration.mlir"
"linalg_transform.mlir"
- "materialize_launch_configuration.mlir"
+ "materialize_aarch64_launch_configuration.mlir"
+ "materialize_riscv_launch_configuration.mlir"
+ "materialize_x86_64_launch_configuration.mlir"
"synchronize_symbol_visibility.mlir"
"test_config_mmt4d.mlir"
"tile_fuse_and_vectorize.mlir"
diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir
new file mode 100644
index 0000000..4be2ead
--- /dev/null
+++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir
@@ -0,0 +1,250 @@
+// RUN: iree-opt -pass-pipeline='hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{test-lowering-configuration=true}))' -split-input-file %s | FileCheck %s
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>,
+ #hal.descriptor_set.binding<3, storage_buffer>
+ ]>
+]>
+hal.executable private @matmul_tensors {
+ hal.executable.variant @llvm, target = <"llvm", "embedded-elf-arm_64", {
+ data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
+ native_vector_size = 16 : index,
+ target_triple = "aarch64-unknown-unknown-eabi-elf"
+ }> {
+ hal.executable.entry_point @matmul_tensors layout(#executable_layout)
+ builtin.module {
+ func.func @matmul_tensors() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %M = hal.interface.constant.load[0] : index
+ %N = hal.interface.constant.load[1] : index
+ %K = hal.interface.constant.load[2] : index
+ %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %K}
+ %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%K, %N}
+ %init_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %N}
+ %result_binding = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer)
+ : !flow.dispatch.tensor<writeonly:?x?xf32>{%M, %N}
+ %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %K} -> tensor<?x?xf32>
+ %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [%K, %N], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%K, %N} -> tensor<?x?xf32>
+ %init = flow.dispatch.tensor.load %init_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %N} -> tensor<?x?xf32>
+ %gemm = linalg.matmul ins(%lhs, %rhs : tensor<?x?xf32>, tensor<?x?xf32>) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+ flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
+ : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32>{%M, %N}
+ return
+ }
+ }
+ }
+}
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [16, 4, 64], [4, 4, 4]{{\]}}>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
+// CHECK: hal.executable.entry_point public @matmul_tensors
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.matmul
+// CHECK-SAME: lowering_config = #[[CONFIG]]
+
+// -----
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+hal.executable private @batch_matmul_tensors {
+ hal.executable.variant @llvm, target = <"llvm", "embedded-elf-arm_64", {
+ data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
+ native_vector_size = 16 : index,
+ target_triple = "aarch64-unknown-unknown-eabi-elf"
+ }> {
+ hal.executable.entry_point @batch_matmul_tensors layout(#executable_layout)
+ builtin.module {
+ func.func @batch_matmul_tensors() {
+ %cst = arith.constant 0.000000e+00 : f32
+ %B = hal.interface.constant.load[0] : index
+ %M = hal.interface.constant.load[1] : index
+ %N = hal.interface.constant.load[2] : index
+ %K = hal.interface.constant.load[3] : index
+ %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(32)
+ : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %M, %K}
+ %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(32)
+ : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %K, %N}
+ %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(32)
+ : !flow.dispatch.tensor<writeonly:?x?x?xf32>{%B, %M, %N}
+ %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0, 0], sizes = [%B, %M, %K], strides = [1, 1, 1]
+ : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %M, %K} -> tensor<?x?x?xf32>
+ %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0, 0], sizes = [%B, %K, %N], strides = [1, 1, 1]
+ : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %K, %N} -> tensor<?x?x?xf32>
+ %init = linalg.init_tensor [%B, %M, %N] : tensor<?x?x?xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ %batch_gemm = linalg.batch_matmul
+ ins(%lhs, %rhs : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%fill : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ flow.dispatch.tensor.store %batch_gemm, %result_binding, offsets = [0, 0, 0], sizes = [%B, %M, %N], strides = [1, 1, 1]
+ : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?x?xf32>{%B, %M, %N}
+ return
+ }
+ }
+ }
+}
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 64, 64, 0], [1, 16, 4, 64], [1, 4, 4, 4]{{\]}}>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
+// CHECK: hal.executable.entry_point public @batch_matmul_tensors
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.batch_matmul
+// CHECK-SAME: lowering_config = #[[CONFIG]]
+
+// -----
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+hal.executable private @matmul_static {
+ hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
+ data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
+ native_vector_size = 16 : index,
+ target_triple = "aarch64-none-linux-android30"
+ }> {
+ hal.executable.entry_point public @matmul_static layout(#executable_layout)
+ builtin.module {
+ func.func @matmul_static() {
+ %cst = arith.constant 0.0 : f32
+ %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:196x240xf32>
+ %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:240x40xf32>
+ %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
+ : !flow.dispatch.tensor<writeonly:196x40xf32>
+ %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [196, 240], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:196x240xf32> -> tensor<196x240xf32>
+ %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [240, 40], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:240x40xf32> -> tensor<240x40xf32>
+ %init = linalg.init_tensor [196, 40] : tensor<196x40xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<196x40xf32>) -> tensor<196x40xf32>
+ %gemm = linalg.matmul ins(%lhs, %rhs : tensor<196x240xf32>, tensor<240x40xf32>)
+ outs(%fill : tensor<196x40xf32>) -> tensor<196x40xf32>
+ flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [196, 40], strides = [1, 1]
+ : tensor<196x40xf32> -> !flow.dispatch.tensor<writeonly:196x40xf32>
+ return
+ }
+ }
+ }
+}
+
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[49, 8, 0], [7, 4, 60], [4, 4, 4]{{\]}}>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
+// CHECK: hal.executable.entry_point public @matmul_static
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.matmul
+// CHECK-SAME: lowering_config = #[[CONFIG]]
+
+// -----
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+hal.executable private @restrict_num_workgroups {
+ hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
+ data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
+ native_vector_size = 16 : index,
+ target_triple = "aarch64-none-linux-android30"
+ }> {
+ hal.executable.entry_point public @restrict_num_workgroups layout(#executable_layout)
+ builtin.module {
+ func.func @restrict_num_workgroups() {
+ %cst = arith.constant 0.000000e+00 : f32
+ %input_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:1x11x11x576xf32>
+ %filter_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
+ : !flow.dispatch.tensor<readonly:5x5x576xf32>
+ %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
+ : !flow.dispatch.tensor<writeonly:1x7x7x576xf32>
+ %input = flow.dispatch.tensor.load %input_binding, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1]
+ : !flow.dispatch.tensor<readonly:1x11x11x576xf32> -> tensor<1x11x11x576xf32>
+ %filter = flow.dispatch.tensor.load %filter_binding, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1]
+ : !flow.dispatch.tensor<readonly:5x5x576xf32> -> tensor<5x5x576xf32>
+ %init = linalg.init_tensor [1, 7, 7, 576] : tensor<1x7x7x576xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
+ %conv = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+ ins(%input, %filter : tensor<1x11x11x576xf32>, tensor<5x5x576xf32>)
+ outs(%fill : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
+ flow.dispatch.tensor.store %conv, %result_binding, offsets = [0, 0, 0, 0], sizes = [1, 7, 7, 576], strides = [1, 1, 1, 1]
+ : tensor<1x7x7x576xf32> -> !flow.dispatch.tensor<writeonly:1x7x7x576xf32>
+ return
+ }
+ }
+ }
+}
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 1, 7, 64, 0, 0], [1, 1, 7, 8, 0, 0], [0, 0, 0, 0, 1, 1]]>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
+// CHECK: hal.executable.entry_point public @restrict_num_workgroups
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.depthwise_conv_2d_nhwc_hwc
+// CHECK-SAME: lowering_config = #[[CONFIG]]
+
+
+// -----
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+hal.executable private @matmul_aarch_i8_i8_i32 {
+ hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
+ data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
+ native_vector_size = 16 : index,
+ target_triple = "aarch64-none-linux-android30"
+ }> {
+ hal.executable.entry_point public @matmul_aarch_i8_i8_i32 layout(#executable_layout)
+ builtin.module {
+ func.func @matmul_aarch_i8_i8_i32() {
+ %c0 = arith.constant 0 : index
+ %M = hal.interface.constant.load[0] : index
+ %N = hal.interface.constant.load[1] : index
+ %K = hal.interface.constant.load[2] : index
+ %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(32)
+ : !flow.dispatch.tensor<readonly:?x?xi8>{%M, %K}
+ %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(32)
+ : !flow.dispatch.tensor<readonly:?x?xi8>{%K, %N}
+ %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(32)
+ : !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N}
+ %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:?x?xi8>{%M, %K} -> tensor<?x?xi8>
+ %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [%K, %N], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:?x?xi8>{%K, %N} -> tensor<?x?xi8>
+ %init = flow.dispatch.tensor.load %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
+ : !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N} -> tensor<?x?xi32>
+ %gemm = linalg.matmul ins(%lhs, %rhs : tensor<?x?xi8>, tensor<?x?xi8>) outs(%init : tensor<?x?xi32>) -> tensor<?x?xi32>
+ flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
+ : tensor<?x?xi32> -> !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N}
+ return
+ }
+ }
+ }
+}
+
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [4, 16, 0], [0, 0, 4]{{\]}}>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
+// CHECK: hal.executable.entry_point public @matmul_aarch_i8_i8_i32
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.matmul
+// CHECK-SAME: lowering_config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir
new file mode 100644
index 0000000..248fc96
--- /dev/null
+++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir
@@ -0,0 +1,47 @@
+// RUN: iree-opt -pass-pipeline='hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{test-lowering-configuration=true}))' -split-input-file %s | FileCheck %s
+
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
+ #hal.descriptor_set.layout<0, bindings = [
+ #hal.descriptor_set.binding<0, storage_buffer>,
+ #hal.descriptor_set.binding<1, storage_buffer>,
+ #hal.descriptor_set.binding<2, storage_buffer>
+ ]>
+]>
+hal.executable private @matmul_riscv {
+ hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+ "llvm",
+ "embedded-elf-riscv_32", {
+ cpu_features = "+m,+f",
+ data_layout = "e-m:e-p:32:32-i64:64-n32-S128",
+ native_vector_size = 0 : index,
+ target_triple = "riscv32-unknown-unknown-eabi-elf"
+ }> {
+ hal.executable.entry_point public @matmul_riscv layout(#executable_layout)
+ builtin.module {
+ func.func @matmul_riscv() {
+ %cst = arith.constant 0.0 : f32
+ %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:384x512xf32>
+ %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<readonly:512x128xf32>
+ %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : !flow.dispatch.tensor<writeonly:384x128xf32>
+ %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [384, 512], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:384x512xf32> -> tensor<384x512xf32>
+ %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [512, 128], strides = [1, 1]
+ : !flow.dispatch.tensor<readonly:512x128xf32> -> tensor<512x128xf32>
+ %init = linalg.init_tensor [384, 128] : tensor<384x128xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<384x128xf32>) -> tensor<384x128xf32>
+ %gemm = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>)
+ outs(%fill : tensor<384x128xf32>) -> tensor<384x128xf32>
+ flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [384, 128], strides = [1, 1]
+ : tensor<384x128xf32> -> !flow.dispatch.tensor<writeonly:384x128xf32>
+ return
+ }
+ }
+ }
+}
+
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [8, 32, 0], [0, 0, 16]{{\]}}>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
+// CHECK: hal.executable.entry_point public @matmul_riscv
+// CHECK-SAME: translation_info = #[[TRANSLATION]]
+// CHECK: linalg.matmul
+// CHECK-SAME: lowering_config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
similarity index 78%
rename from iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
rename to iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
index f9c3e3c..8281ba3 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
@@ -4,59 +4,6 @@
#hal.descriptor_set.layout<0, bindings = [
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
-]>
-hal.executable private @matmul_tensors {
- hal.executable.variant @llvm, target = <"llvm", "embedded-elf-arm_64", {
- data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
- native_vector_size = 16 : index,
- target_triple = "aarch64-unknown-unknown-eabi-elf"
- }> {
- hal.executable.entry_point @matmul_tensors layout(#executable_layout)
- builtin.module {
- func.func @matmul_tensors() {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %M = hal.interface.constant.load[0] : index
- %N = hal.interface.constant.load[1] : index
- %K = hal.interface.constant.load[2] : index
- %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %K}
- %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:?x?xf32>{%K, %N}
- %init_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %N}
- %result_binding = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer)
- : !flow.dispatch.tensor<writeonly:?x?xf32>{%M, %N}
- %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %K} -> tensor<?x?xf32>
- %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [%K, %N], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:?x?xf32>{%K, %N} -> tensor<?x?xf32>
- %init = flow.dispatch.tensor.load %init_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:?x?xf32>{%M, %N} -> tensor<?x?xf32>
- %gemm = linalg.matmul ins(%lhs, %rhs : tensor<?x?xf32>, tensor<?x?xf32>) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
- flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
- : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32>{%M, %N}
- return
- }
- }
- }
-}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [16, 4, 64], [4, 4, 4]{{\]}}>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
-// CHECK: hal.executable.entry_point public @matmul_tensors
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.matmul
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-// -----
-
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
#hal.descriptor_set.binding<2, storage_buffer>
]>
]>
@@ -255,57 +202,6 @@
// -----
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
-hal.executable private @batch_matmul_tensors {
- hal.executable.variant @llvm, target = <"llvm", "embedded-elf-arm_64", {
- data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
- native_vector_size = 16 : index,
- target_triple = "aarch64-unknown-unknown-eabi-elf"
- }> {
- hal.executable.entry_point @batch_matmul_tensors layout(#executable_layout)
- builtin.module {
- func.func @batch_matmul_tensors() {
- %cst = arith.constant 0.000000e+00 : f32
- %B = hal.interface.constant.load[0] : index
- %M = hal.interface.constant.load[1] : index
- %N = hal.interface.constant.load[2] : index
- %K = hal.interface.constant.load[3] : index
- %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(32)
- : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %M, %K}
- %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(32)
- : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %K, %N}
- %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(32)
- : !flow.dispatch.tensor<writeonly:?x?x?xf32>{%B, %M, %N}
- %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0, 0], sizes = [%B, %M, %K], strides = [1, 1, 1]
- : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %M, %K} -> tensor<?x?x?xf32>
- %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0, 0], sizes = [%B, %K, %N], strides = [1, 1, 1]
- : !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %K, %N} -> tensor<?x?x?xf32>
- %init = linalg.init_tensor [%B, %M, %N] : tensor<?x?x?xf32>
- %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
- %batch_gemm = linalg.batch_matmul
- ins(%lhs, %rhs : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%fill : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
- flow.dispatch.tensor.store %batch_gemm, %result_binding, offsets = [0, 0, 0], sizes = [%B, %M, %N], strides = [1, 1, 1]
- : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?x?xf32>{%B, %M, %N}
- return
- }
- }
- }
-}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 64, 64, 0], [1, 16, 4, 64], [1, 4, 4, 4]{{\]}}>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
-// CHECK: hal.executable.entry_point public @batch_matmul_tensors
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.batch_matmul
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-// -----
-
#compilation = #iree_codegen.compilation_info<
lowering_config = <tile_sizes = [[64, 64, 0], [32, 32, 0], [0, 0, 32]]>,
translation_info = <CPUDoubleTilingExpert>,
@@ -735,103 +631,6 @@
#hal.descriptor_set.binding<2, storage_buffer>
]>
]>
-hal.executable private @matmul_static {
- hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
- data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
- native_vector_size = 16 : index,
- target_triple = "aarch64-none-linux-android30"
- }> {
- hal.executable.entry_point public @matmul_static layout(#executable_layout)
- builtin.module {
- func.func @matmul_static() {
- %cst = arith.constant 0.0 : f32
- %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:196x240xf32>
- %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:240x40xf32>
- %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
- : !flow.dispatch.tensor<writeonly:196x40xf32>
- %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [196, 240], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:196x240xf32> -> tensor<196x240xf32>
- %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [240, 40], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:240x40xf32> -> tensor<240x40xf32>
- %init = linalg.init_tensor [196, 40] : tensor<196x40xf32>
- %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<196x40xf32>) -> tensor<196x40xf32>
- %gemm = linalg.matmul ins(%lhs, %rhs : tensor<196x240xf32>, tensor<240x40xf32>)
- outs(%fill : tensor<196x40xf32>) -> tensor<196x40xf32>
- flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [196, 40], strides = [1, 1]
- : tensor<196x40xf32> -> !flow.dispatch.tensor<writeonly:196x40xf32>
- return
- }
- }
- }
-}
-
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[49, 8, 0], [7, 4, 60], [4, 4, 4]{{\]}}>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUTileFuseAndVectorize>
-// CHECK: hal.executable.entry_point public @matmul_static
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.matmul
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-// -----
-
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
-hal.executable private @restrict_num_workgroups {
- hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
- data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
- native_vector_size = 16 : index,
- target_triple = "aarch64-none-linux-android30"
- }> {
- hal.executable.entry_point public @restrict_num_workgroups layout(#executable_layout)
- builtin.module {
- func.func @restrict_num_workgroups() {
- %cst = arith.constant 0.000000e+00 : f32
- %input_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:1x11x11x576xf32>
- %filter_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
- : !flow.dispatch.tensor<readonly:5x5x576xf32>
- %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer)
- : !flow.dispatch.tensor<writeonly:1x7x7x576xf32>
- %input = flow.dispatch.tensor.load %input_binding, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1]
- : !flow.dispatch.tensor<readonly:1x11x11x576xf32> -> tensor<1x11x11x576xf32>
- %filter = flow.dispatch.tensor.load %filter_binding, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1]
- : !flow.dispatch.tensor<readonly:5x5x576xf32> -> tensor<5x5x576xf32>
- %init = linalg.init_tensor [1, 7, 7, 576] : tensor<1x7x7x576xf32>
- %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
- %conv = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
- ins(%input, %filter : tensor<1x11x11x576xf32>, tensor<5x5x576xf32>)
- outs(%fill : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
- flow.dispatch.tensor.store %conv, %result_binding, offsets = [0, 0, 0, 0], sizes = [1, 7, 7, 576], strides = [1, 1, 1, 1]
- : tensor<1x7x7x576xf32> -> !flow.dispatch.tensor<writeonly:1x7x7x576xf32>
- return
- }
- }
- }
-}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 1, 7, 64, 0, 0], [1, 1, 7, 8, 0, 0], [0, 0, 0, 0, 1, 1]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
-// CHECK: hal.executable.entry_point public @restrict_num_workgroups
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.depthwise_conv_2d_nhwc_hwc
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-
-// -----
-
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
hal.executable private @matmul_x86 {
hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
"llvm",
@@ -872,54 +671,6 @@
// -----
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
-hal.executable private @matmul_riscv {
- hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
- "llvm",
- "embedded-elf-riscv_32", {
- cpu_features = "+m,+f",
- data_layout = "e-m:e-p:32:32-i64:64-n32-S128",
- native_vector_size = 0 : index,
- target_triple = "riscv32-unknown-unknown-eabi-elf"
- }> {
- hal.executable.entry_point public @matmul_riscv layout(#executable_layout)
- builtin.module {
- func.func @matmul_riscv() {
- %cst = arith.constant 0.0 : f32
- %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:384x512xf32>
- %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<readonly:512x128xf32>
- %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : !flow.dispatch.tensor<writeonly:384x128xf32>
- %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [384, 512], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:384x512xf32> -> tensor<384x512xf32>
- %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [512, 128], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:512x128xf32> -> tensor<512x128xf32>
- %init = linalg.init_tensor [384, 128] : tensor<384x128xf32>
- %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<384x128xf32>) -> tensor<384x128xf32>
- %gemm = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>)
- outs(%fill : tensor<384x128xf32>) -> tensor<384x128xf32>
- flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [384, 128], strides = [1, 1]
- : tensor<384x128xf32> -> !flow.dispatch.tensor<writeonly:384x128xf32>
- return
- }
- }
- }
-}
-
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [8, 32, 0], [0, 0, 16]{{\]}}>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
-// CHECK: hal.executable.entry_point public @matmul_riscv
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.matmul
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-// -----
-
#executable_layout = #hal.executable.layout<push_constants = 4, sets = [
#hal.descriptor_set.layout<0, bindings = [
#hal.descriptor_set.binding<0, storage_buffer>,
@@ -1030,56 +781,6 @@
// -----
-#executable_layout = #hal.executable.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
-hal.executable private @matmul_aarch_i8_i8_i32 {
- hal.executable.variant public @system_elf_arm_64, target = <"llvm", "system-elf-arm_64", {
- data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
- native_vector_size = 16 : index,
- target_triple = "aarch64-none-linux-android30"
- }> {
- hal.executable.entry_point public @matmul_aarch_i8_i8_i32 layout(#executable_layout)
- builtin.module {
- func.func @matmul_aarch_i8_i8_i32() {
- %c0 = arith.constant 0 : index
- %M = hal.interface.constant.load[0] : index
- %N = hal.interface.constant.load[1] : index
- %K = hal.interface.constant.load[2] : index
- %lhs_binding = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(32)
- : !flow.dispatch.tensor<readonly:?x?xi8>{%M, %K}
- %rhs_binding = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(32)
- : !flow.dispatch.tensor<readonly:?x?xi8>{%K, %N}
- %result_binding = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(32)
- : !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N}
- %lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:?x?xi8>{%M, %K} -> tensor<?x?xi8>
- %rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [%K, %N], strides = [1, 1]
- : !flow.dispatch.tensor<readonly:?x?xi8>{%K, %N} -> tensor<?x?xi8>
- %init = flow.dispatch.tensor.load %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
- : !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N} -> tensor<?x?xi32>
- %gemm = linalg.matmul ins(%lhs, %rhs : tensor<?x?xi8>, tensor<?x?xi8>) outs(%init : tensor<?x?xi32>) -> tensor<?x?xi32>
- flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [%M, %N], strides = [1, 1]
- : tensor<?x?xi32> -> !flow.dispatch.tensor<readwrite:?x?xi32>{%M, %N}
- return
- }
- }
- }
-}
-
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [4, 16, 0], [0, 0, 4]{{\]}}>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
-// CHECK: hal.executable.entry_point public @matmul_aarch_i8_i8_i32
-// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK: linalg.matmul
-// CHECK-SAME: lowering_config = #[[CONFIG]]
-
-// -----
-
#executable_layout = #hal.executable.layout<push_constants = 4, sets = [
#hal.descriptor_set.layout<0, bindings = [
#hal.descriptor_set.binding<0, storage_buffer>,