[CPU] Add an e2e test for i4 store emulation. (#15539)
Fixes https://github.com/openxla/iree/issues/15369
diff --git a/tests/e2e/linalg/BUILD.bazel b/tests/e2e/linalg/BUILD.bazel
index a4a7e55..fa856fd 100644
--- a/tests/e2e/linalg/BUILD.bazel
+++ b/tests/e2e/linalg/BUILD.bazel
@@ -24,9 +24,16 @@
"i4_to_f32.mlir",
],
include = ["*.mlir"],
- exclude = ["large_linalg_matmul.mlir"],
+ exclude = [
+ "large_linalg_matmul.mlir",
+ "f32_to_i4.mlir",
+ ],
)
+LLVM_I4_SRCS = [
+ "f32_to_i4.mlir",
+]
+
iree_check_single_backend_test_suite(
name = "check_llvm-cpu_local-task",
srcs = LLVM_SRCS,
@@ -35,6 +42,18 @@
)
iree_check_single_backend_test_suite(
+ name = "check_i4_llvm-cpu_local-task",
+ srcs = LLVM_I4_SRCS,
+ driver = "local-task",
+ tags = [
+ # TODO(#15540): RISC-V needs sub-byte emulation for vector.maskedstore
+ # ops. Enable the test after it is supported.
+ "noriscv",
+ ],
+ target_backend = "llvm-cpu",
+)
+
+iree_check_single_backend_test_suite(
name = "check_winograd_llvm-cpu_local-task",
srcs = LLVM_SRCS,
compiler_flags = [
@@ -52,6 +71,7 @@
exclude = [
"large_linalg_matmul.mlir",
"i4_to_f32.mlir",
+ "f32_to_i4.mlir",
],
)
@@ -68,7 +88,10 @@
"i4_to_f32.mlir",
],
include = ["*.mlir"],
- exclude = ["large_linalg_matmul.mlir"],
+ exclude = [
+ "large_linalg_matmul.mlir",
+ "f32_to_i4.mlir",
+ ],
)
iree_check_single_backend_test_suite(
diff --git a/tests/e2e/linalg/CMakeLists.txt b/tests/e2e/linalg/CMakeLists.txt
index 7f2e9b5..6d83754 100644
--- a/tests/e2e/linalg/CMakeLists.txt
+++ b/tests/e2e/linalg/CMakeLists.txt
@@ -24,6 +24,19 @@
iree_check_single_backend_test_suite(
NAME
+ check_i4_llvm-cpu_local-task
+ SRCS
+ "f32_to_i4.mlir"
+ TARGET_BACKEND
+ "llvm-cpu"
+ DRIVER
+ "local-task"
+ LABELS
+ "noriscv"
+)
+
+iree_check_single_backend_test_suite(
+ NAME
check_winograd_llvm-cpu_local-task
SRCS
"conv2d.mlir"
diff --git a/tests/e2e/linalg/f32_to_i4.mlir b/tests/e2e/linalg/f32_to_i4.mlir
new file mode 100644
index 0000000..5abf4cc
--- /dev/null
+++ b/tests/e2e/linalg/f32_to_i4.mlir
@@ -0,0 +1,24 @@
+func.func @f32_to_i4_1d() {
+ %input = util.unfoldable_constant dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> : tensor<8xf32>
+ %init0 = tensor.empty() : tensor<8xi4>
+ %res = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]}
+ ins(%input : tensor<8xf32>) outs(%init0 : tensor<8xi4>) {
+ ^bb0(%in: f32, %out: i4):
+ %2 = arith.fptoui %in : f32 to i32
+ %3 = arith.trunci %2 : i32 to i4
+ linalg.yield %3 : i4
+ } -> tensor<8xi4>
+
+ // TODO(#14996): Remove the signed extention and directly check with i4 types.
+ %blocker = util.optimization_barrier %res : tensor<8xi4>
+ %init1 = tensor.empty() : tensor<8xi8>
+ %exti8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]}
+ ins(%blocker : tensor<8xi4>) outs(%init1 : tensor<8xi8>) {
+ ^bb0(%in: i4, %out: i8):
+ %2 = arith.extsi %in : i4 to i8
+ linalg.yield %2 : i8
+ } -> tensor<8xi8>
+
+ check.expect_eq_const(%exti8, dense<[0, 1, 2, 3, 4, 5, 6, 7]> : tensor<8xi8>) : tensor<8xi8>
+ return
+}