e2e matmul test improvements (#18725)
This PR is made of individual commits for review convenience and so we
can drop anything that causes problems on CI.
* Add default shapes set, combining small and large.
* The need to specify "small" or "large" is a real need in only a
minority of cases. That's a difference from when these tests were first
added.
* Enable dynamic sizes in large shapes, leaving only gpu_large_aligned
out.
* Who remembered that large shapes weren't tested as dynamic shapes,
unlike small shapes... and unlike "gpu_large" shapes?!
* Rename gpu_large_aligned -> easy_large_static.
* This is only needed in sketchy GPU codegen pipelines that can't deal
with sizes that aren't multiples of some internal tile size.
* Fold gpu_large into large and tolerate fuzzy bf16 accumulators.
* Retaining the evidently more curated set of shapes from "gpu_large".
The larger sizes ran into new issues with the mostly artificial case of
bf16 accumulators.
* Use default shapes and reenable sanitizers.
* This simplifies the build, reduces the number of targets and increases
coverage as "default" combines small and large shapes. And this
reenables sanitizers that hard been disabled on large sizes due to
timeouts. As tests at some point started verifying only a subset of
result matrix elements, the timeouts should be avoided now.
* Enable default shapes for most rocm tests.
* The motivation for this PR. The rest just bubbled up from there.
* Make large shapes more diverse (including odd and rectangular kinds of
shapes).
---------
Signed-off-by: Benoit Jacob <jacob.benoit.1@gmail.com>
diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel
index e712f9f..17b2757 100644
--- a/tests/e2e/matmul/BUILD.bazel
+++ b/tests/e2e/matmul/BUILD.bazel
@@ -26,7 +26,7 @@
# LLVMCPU, non-data-tiling, no microkernels
[iree_generated_e2e_runner_test(
- name = "e2e_matmul_cpu_nondt_%s_%s_%s" % (lhs_rhs_type, acc_type, size),
+ name = "e2e_matmul_cpu_nondt_%s_%s" % (lhs_rhs_type, acc_type),
compiler_flags = [
"--iree-opt-data-tiling=false",
"--iree-llvmcpu-enable-ukernels=none",
@@ -37,7 +37,6 @@
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
- "--shapes=%s" % size,
],
tags = [
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
@@ -60,9 +59,6 @@
# TODO(#15258): enable bf16 tests when that bug is fixed.
# ("bf16", "bf16"),
# ("bf16", "f32"),
-] for size in [
- "small",
- "large",
]]
PREPROCESSING_TRANSPOSE_LHS = "--iree-preprocessing-pass-pipeline=builtin.module\\(util.func\\(iree-preprocessing-transpose-matmul-pass{input=lhs}\\)\\)"
@@ -71,9 +67,8 @@
# LLVMCPU, non-data-tiling, no microkernels, ArmSME
[iree_generated_e2e_runner_test(
- name = "e2e_matmul_cpu_arm_sme_nondt_%s_%s%s%s" % (
+ name = "e2e_matmul_cpu_arm_sme_nondt_%s_%s%s" % (
dtype,
- size,
"_transpose_lhs" if transpose_lhs else "",
"_peel" if peel else "",
),
@@ -87,7 +82,6 @@
generator_args = [
"--lhs_rhs_type=%s" % dtype,
"--acc_type=%s" % dtype,
- "--shapes=%s" % size,
],
tags = [
"requires-arm-sme",
@@ -101,9 +95,6 @@
) for dtype in [
"f32",
# "f64" (also supported for ArmSME, but not by the test generator)
-] for size in [
- "small",
- "large",
] for transpose_lhs in [
True,
False,
@@ -137,11 +128,10 @@
# LLVMCPU, data-tiling, data-tiling + ukernels.
[iree_generated_e2e_runner_test(
- name = "e2e_matmul_cpu_dt%s_%s_%s_%s" % (
+ name = "e2e_matmul_cpu_dt%s_%s_%s" % (
("_uk" if use_uk else ""),
lhs_rhs_type,
acc_type,
- size,
),
compiler_flags = [
"--iree-opt-data-tiling",
@@ -150,14 +140,8 @@
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
- "--shapes=%s" % size,
],
tags = ([
- # "--shapes=large" can cause timeouts on sanitizers.
- "noasan",
- "notsan",
- ] if size == "large" else []) + ([
- # "--shapes=large" can cause timeouts on RISC-V emulator.
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
"noriscv",
"nowasm",
@@ -206,18 +190,14 @@
("bf16", "bf16"),
("bf16", "f32"),
]
-) for size in [
- "small",
- "large",
-]]
+)]
# LLVMCPU, data-tiling, data-tiling + ukernels + late materialization.
[iree_generated_e2e_runner_test(
- name = "e2e_matmul_cpu_experimental_dt%s_%s_%s_%s" % (
+ name = "e2e_matmul_cpu_experimental_dt%s_%s_%s" % (
("_uk" if use_uk else ""),
lhs_rhs_type,
acc_type,
- size,
),
compiler_flags = [
"--iree-opt-data-tiling",
@@ -227,14 +207,8 @@
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
- "--shapes=%s" % size,
],
tags = ([
- # "--shapes=large" can cause timeouts on sanitizers.
- "noasan",
- "notsan",
- ] if size == "large" else []) + ([
- # "--shapes=large" can cause timeouts on RISC-V emulator.
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
"noriscv",
"nowasm",
@@ -283,10 +257,7 @@
("bf16", "bf16"),
("bf16", "f32"),
]
-) for size in [
- "small",
- "large",
-]]
+)]
[iree_generated_e2e_runner_test(
name = "e2e_matmul_vmvx_experimental_dt%s_%s_%s" % (
@@ -412,7 +383,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulSimt",
],
tags = [
@@ -440,7 +411,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
@@ -459,14 +430,13 @@
)
iree_generated_e2e_runner_test(
- name = "e2e_matmul_cuda_f32_large_unaligned",
+ name = "e2e_matmul_cuda_f32",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
- "--shapes=gpu_large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
@@ -484,14 +454,13 @@
)
iree_generated_e2e_runner_test(
- name = "e2e_matmul_cuda_f16_large_unaligned",
+ name = "e2e_matmul_cuda_f16",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
- "--shapes=gpu_large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
@@ -517,7 +486,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
@@ -544,7 +513,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
@@ -571,7 +540,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
@@ -590,14 +559,13 @@
)
[iree_generated_e2e_runner_test(
- name = "e2e_matmul_cuda_%s_large_splitk" % lhs_rhs_type,
+ name = "e2e_matmul_cuda_%s_splitk" % lhs_rhs_type,
compiler_flags = [
"--iree-dispatch-creation-split-matmul-reduction=4",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
- "--shapes=large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
@@ -606,8 +574,6 @@
"notsan",
"noubsan",
"requires-gpu-nvidia",
- # "--shapes=large" can cause timeouts on riscv emulator.
- "noriscv",
],
target_backends_and_drivers = [
("cuda", "cuda"),
@@ -632,7 +598,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=SPIRVVectorizeMali",
],
tags = [
@@ -659,7 +625,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=SPIRVVectorizeNVIDIA",
],
tags = [
@@ -685,7 +651,7 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
- "--shapes=gpu_large_aligned",
+ "--shapes=easy_large_static",
"--compilation_info=SPIRVCooperativeMatrixVectorize",
],
runner_args = [
diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt
index 58d0e16..df2c92c 100644
--- a/tests/e2e/matmul/CMakeLists.txt
+++ b/tests/e2e/matmul/CMakeLists.txt
@@ -12,7 +12,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs_peel
+ e2e_matmul_cpu_arm_sme_nondt_f32__transpose_lhs_peel
TEST_TYPE
matmul
GENERATOR
@@ -20,7 +20,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -41,7 +40,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs
+ e2e_matmul_cpu_arm_sme_nondt_f32__transpose_lhs
TEST_TYPE
matmul
GENERATOR
@@ -49,7 +48,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -69,7 +67,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_small_peel
+ e2e_matmul_cpu_arm_sme_nondt_f32__peel
TEST_TYPE
matmul
GENERATOR
@@ -77,7 +75,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -97,7 +94,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_small
+ e2e_matmul_cpu_arm_sme_nondt_f32_
TEST_TYPE
matmul
GENERATOR
@@ -105,7 +102,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -124,119 +120,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs_peel
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling=false"
- "--iree-llvmcpu-enable-scalable-vectorization"
- "--iree-llvmcpu-target-triple=aarch64-unknown-unknown"
- "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)"
- "--iree-llvmcpu-vector-pproc-strategy=peel"
- LABELS
- "requires-arm-sme"
- TARGET_CPU_FEATURES_VARIANTS
- "arm_64:sme:+sve,+sme"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling=false"
- "--iree-llvmcpu-enable-scalable-vectorization"
- "--iree-llvmcpu-target-triple=aarch64-unknown-unknown"
- "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)"
- LABELS
- "requires-arm-sme"
- TARGET_CPU_FEATURES_VARIANTS
- "arm_64:sme:+sve,+sme"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_large_peel
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling=false"
- "--iree-llvmcpu-enable-scalable-vectorization"
- "--iree-llvmcpu-target-triple=aarch64-unknown-unknown"
- "--iree-llvmcpu-vector-pproc-strategy=peel"
- LABELS
- "requires-arm-sme"
- TARGET_CPU_FEATURES_VARIANTS
- "arm_64:sme:+sve,+sme"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_arm_sme_nondt_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling=false"
- "--iree-llvmcpu-enable-scalable-vectorization"
- "--iree-llvmcpu-target-triple=aarch64-unknown-unknown"
- LABELS
- "requires-arm-sme"
- TARGET_CPU_FEATURES_VARIANTS
- "arm_64:sme:+sve,+sme"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_i8_i32_small
+ e2e_matmul_cpu_dt_i8_i32
TEST_TYPE
matmul
GENERATOR
@@ -244,7 +128,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=i8"
"--acc_type=i32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -265,37 +148,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_i8_i32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=i8"
- "--acc_type=i32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "arm_64:dotprod:+dotprod"
- "arm_64:i8mm:+i8mm"
- "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_f32_f32_small
+ e2e_matmul_cpu_dt_f32_f32
TEST_TYPE
matmul
GENERATOR
@@ -303,7 +156,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -323,36 +175,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_f32_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_f16_f16_small
+ e2e_matmul_cpu_dt_f16_f16
TEST_TYPE
matmul
GENERATOR
@@ -360,7 +183,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -382,39 +204,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_f16_f16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fullfp16:+fullfp16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_f16_f32_small
+ e2e_matmul_cpu_dt_f16_f32
TEST_TYPE
matmul
GENERATOR
@@ -422,7 +212,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -444,39 +233,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_f16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fp16fml:+fp16fml"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_bf16_bf16_small
+ e2e_matmul_cpu_dt_bf16_bf16
TEST_TYPE
matmul
GENERATOR
@@ -484,7 +241,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=bf16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -507,40 +263,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_bf16_bf16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=bf16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_bf16_f32_small
+ e2e_matmul_cpu_dt_bf16_f32
TEST_TYPE
matmul
GENERATOR
@@ -548,7 +271,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -571,40 +293,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_bf16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_i8_i32_small
+ e2e_matmul_cpu_dt_uk_i8_i32
TEST_TYPE
matmul
GENERATOR
@@ -612,7 +301,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=i8"
"--acc_type=i32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -633,37 +321,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_i8_i32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=i8"
- "--acc_type=i32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "arm_64:dotprod:+dotprod"
- "arm_64:i8mm:+i8mm"
- "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_f32_f32_small
+ e2e_matmul_cpu_dt_uk_f32_f32
TEST_TYPE
matmul
GENERATOR
@@ -671,7 +329,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -691,36 +348,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_f32_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_f16_f16_small
+ e2e_matmul_cpu_dt_uk_f16_f16
TEST_TYPE
matmul
GENERATOR
@@ -728,7 +356,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -750,39 +377,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_f16_f16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fullfp16:+fullfp16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_f16_f32_small
+ e2e_matmul_cpu_dt_uk_f16_f32
TEST_TYPE
matmul
GENERATOR
@@ -790,7 +385,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -812,39 +406,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_f16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fp16fml:+fp16fml"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_bf16_bf16_small
+ e2e_matmul_cpu_dt_uk_bf16_bf16
TEST_TYPE
matmul
GENERATOR
@@ -852,7 +414,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=bf16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -875,40 +436,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_bf16_bf16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=bf16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_dt_uk_bf16_f32_small
+ e2e_matmul_cpu_dt_uk_bf16_f32
TEST_TYPE
matmul
GENERATOR
@@ -916,7 +444,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -939,40 +466,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_dt_uk_bf16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_i8_i32_small
+ e2e_matmul_cpu_experimental_dt_i8_i32
TEST_TYPE
matmul
GENERATOR
@@ -980,7 +474,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=i8"
"--acc_type=i32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1002,38 +495,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_i8_i32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=i8"
- "--acc_type=i32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "arm_64:dotprod:+dotprod"
- "arm_64:i8mm:+i8mm"
- "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_f32_f32_small
+ e2e_matmul_cpu_experimental_dt_f32_f32
TEST_TYPE
matmul
GENERATOR
@@ -1041,7 +503,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1062,37 +523,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_f32_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_f16_f16_small
+ e2e_matmul_cpu_experimental_dt_f16_f16
TEST_TYPE
matmul
GENERATOR
@@ -1100,7 +531,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1123,40 +553,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_f16_f16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fullfp16:+fullfp16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_f16_f32_small
+ e2e_matmul_cpu_experimental_dt_f16_f32
TEST_TYPE
matmul
GENERATOR
@@ -1164,7 +561,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1187,40 +583,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_f16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fp16fml:+fp16fml"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_bf16_bf16_small
+ e2e_matmul_cpu_experimental_dt_bf16_bf16
TEST_TYPE
matmul
GENERATOR
@@ -1228,7 +591,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=bf16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1252,41 +614,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_bf16_bf16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=bf16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_bf16_f32_small
+ e2e_matmul_cpu_experimental_dt_bf16_f32
TEST_TYPE
matmul
GENERATOR
@@ -1294,7 +622,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1318,41 +645,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_bf16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=none"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_i8_i32_small
+ e2e_matmul_cpu_experimental_dt_uk_i8_i32
TEST_TYPE
matmul
GENERATOR
@@ -1360,7 +653,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=i8"
"--acc_type=i32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1382,38 +674,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_i8_i32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=i8"
- "--acc_type=i32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "arm_64:dotprod:+dotprod"
- "arm_64:i8mm:+i8mm"
- "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_f32_f32_small
+ e2e_matmul_cpu_experimental_dt_uk_f32_f32
TEST_TYPE
matmul
GENERATOR
@@ -1421,7 +682,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1442,37 +702,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_f32_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f32"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_f16_f16_small
+ e2e_matmul_cpu_experimental_dt_uk_f16_f16
TEST_TYPE
matmul
GENERATOR
@@ -1480,7 +710,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1503,40 +732,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_f16_f16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fullfp16:+fullfp16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_f16_f32_small
+ e2e_matmul_cpu_experimental_dt_uk_f16_f32
TEST_TYPE
matmul
GENERATOR
@@ -1544,7 +740,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1567,40 +762,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_f16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=f16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "arm_64:fp16fml:+fp16fml"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_bf16_bf16_small
+ e2e_matmul_cpu_experimental_dt_uk_bf16_bf16
TEST_TYPE
matmul
GENERATOR
@@ -1608,7 +770,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=bf16"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1632,41 +793,7 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_bf16_bf16_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=bf16"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
- e2e_matmul_cpu_experimental_dt_uk_bf16_f32_small
+ e2e_matmul_cpu_experimental_dt_uk_bf16_f32
TEST_TYPE
matmul
GENERATOR
@@ -1674,7 +801,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=bf16"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1698,40 +824,6 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cpu_experimental_dt_uk_bf16_f32_large
- TEST_TYPE
- matmul
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=bf16"
- "--acc_type=f32"
- "--shapes=large"
- TEST_RUNNER
- iree_tools_testing_e2e_iree-e2e-matmul-test
- TARGET_BACKENDS
- "llvm-cpu"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-opt-data-tiling"
- "--iree-global-opt-enable-early-materialization=false"
- "--iree-llvmcpu-enable-ukernels=all"
- LABELS
- "noasan"
- "notsan"
- "noriscv"
- "nowasm"
- TARGET_CPU_FEATURES_VARIANTS
- "default"
- "x86_64:avx2:+avx,+avx2,+fma,+f16c"
- "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16"
- "arm_64:bf16:+bf16"
-)
-
-iree_generated_e2e_runner_test(
- NAME
e2e_matmul_vmvx_experimental_dt_f32_f32
TEST_TYPE
matmul
@@ -1877,7 +969,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUMatmulSimt"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -1902,7 +994,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUMatmulTensorCore"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -1922,14 +1014,13 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cuda_f32_large_unaligned
+ e2e_matmul_cuda_f32
TEST_TYPE
matmul
GENERATOR
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1948,14 +1039,13 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cuda_f16_large_unaligned
+ e2e_matmul_cuda_f16
TEST_TYPE
matmul
GENERATOR
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -1981,7 +1071,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2008,7 +1098,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUMatmulTensorCore"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2035,7 +1125,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2055,14 +1145,13 @@
iree_generated_e2e_runner_test(
NAME
- e2e_matmul_cuda_f32_large_splitk
+ e2e_matmul_cuda_f32_splitk
TEST_TYPE
matmul
GENERATOR
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2077,7 +1166,6 @@
"notsan"
"noubsan"
"requires-gpu-nvidia"
- "noriscv"
)
iree_generated_e2e_runner_test(
@@ -2089,7 +1177,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=i8"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeMali"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2113,7 +1201,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeMali"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2137,7 +1225,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeMali"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2161,7 +1249,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=i8"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeNVIDIA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2185,7 +1273,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeNVIDIA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2209,7 +1297,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVVectorizeNVIDIA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2233,7 +1321,7 @@
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f16"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=SPIRVCooperativeMatrixVectorize"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2271,7 +1359,7 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUVectorDistributeMFMA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2299,7 +1387,7 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUVectorDistributeMFMA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2328,7 +1416,7 @@
"--lhs_rhs_type=f16"
"--acc_type=f32"
"--transpose_rhs"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUVectorDistributeMFMA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2361,7 +1449,7 @@
GENERATOR_ARGS
"--lhs_rhs_type=f8E4M3FNUZ"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUVectorDistributeMFMA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2390,7 +1478,7 @@
"--lhs_rhs_type=i8"
"--acc_type=i32"
"--transpose_rhs"
- "--shapes=gpu_large_aligned"
+ "--shapes=easy_large_static"
"--compilation_info=LLVMGPUVectorDistributeMFMA"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
@@ -2418,7 +1506,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2447,7 +1534,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2477,7 +1563,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=i8"
"--acc_type=i32"
- "--shapes=gpu_large_aligned"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2507,7 +1592,6 @@
GENERATOR_ARGS
"--lhs_rhs_type=f32"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2546,8 +1630,8 @@
GENERATOR_ARGS
"--lhs_rhs_type=f16"
"--acc_type=f32"
- "--shapes=gpu_large_aligned"
"--compilation_info=LLVMGPUVectorDistributeWMMA"
+ "--shapes=easy_large_static"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2578,8 +1662,8 @@
"--lhs_rhs_type=f16"
"--acc_type=f32"
"--transpose_rhs"
- "--shapes=gpu_large_aligned"
"--compilation_info=LLVMGPUVectorDistributeWMMA"
+ "--shapes=easy_large_static"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
@@ -2610,8 +1694,8 @@
"--lhs_rhs_type=i8"
"--acc_type=i32"
"--transpose_rhs"
- "--shapes=gpu_large_aligned"
"--compilation_info=LLVMGPUVectorDistributeWMMA"
+ "--shapes=easy_large_static"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-matmul-test
TARGET_BACKENDS
diff --git a/tests/e2e/matmul/generate_e2e_matmul_tests.py b/tests/e2e/matmul/generate_e2e_matmul_tests.py
index c6a2994..30d210d 100644
--- a/tests/e2e/matmul/generate_e2e_matmul_tests.py
+++ b/tests/e2e/matmul/generate_e2e_matmul_tests.py
@@ -35,10 +35,10 @@
# The values are the accepted values for the --shapes= flag.
@enum.unique
class ShapesId(enum.Enum):
+ DEFAULT = "default"
SMALL = "small"
LARGE = "large"
- GPU_LARGE = "gpu_large"
- GPU_LARGE_ALIGNED = "gpu_large_aligned"
+ EASY_LARGE_STATIC = "easy_large_static"
# Enumerates of the collections of compilation info that we can generate tests
@@ -192,6 +192,8 @@
# 2. Some shapes are commented out: they used to be tested but have been
# disabled to improve the trade-off between test coverage and build
# latency.
+ if shapes_id == ShapesId.DEFAULT:
+ return get_test_shapes(ShapesId.SMALL) + get_test_shapes(ShapesId.LARGE)
if shapes_id == ShapesId.SMALL:
return [
# square matrices. Start by the simplest case of 1x1x1.
@@ -223,9 +225,16 @@
]
if shapes_id == ShapesId.LARGE:
return [
- # some random large sizes
- TestShape(m=123, k=456, n=789, accumulate=True),
- TestShape(m=654, k=321, n=234, accumulate=False),
+ # unaligned cases.
+ TestShape(m=457, k=330, n=512, accumulate=False),
+ TestShape(m=438, k=331, n=513, accumulate=False),
+ TestShape(m=540, k=332, n=516, accumulate=False),
+ TestShape(m=1000, k=4, n=512, accumulate=False),
+ TestShape(m=4, k=1000, n=512, accumulate=False),
+ TestShape(m=512, k=1000, n=4, accumulate=False),
+ TestShape(m=513, k=128, n=55, accumulate=False),
+ TestShape(m=7, k=160, n=31, accumulate=False),
+ TestShape(m=512, k=330, n=33, accumulate=False),
# shapes involving vectors (i.e. most rectangular cases)
TestShape(m=1, k=1000, n=1000, accumulate=True), # large vector*matrix
TestShape(m=1000, k=1000, n=1, accumulate=True), # large matrix*vector
@@ -236,25 +245,11 @@
# running on fewer backends/drivers or with fewer generators
# (see get_test_generators).
]
- if shapes_id == ShapesId.GPU_LARGE_ALIGNED:
+ if shapes_id == ShapesId.EASY_LARGE_STATIC:
return [
TestShape(m=512, k=128, n=512, accumulate=True),
TestShape(m=512, k=128, n=512, accumulate=False),
]
- if shapes_id == ShapesId.GPU_LARGE:
- return [
- # unaligned cases.
- TestShape(m=457, k=330, n=512, accumulate=False),
- TestShape(m=457, k=330, n=514, accumulate=False),
- TestShape(m=438, k=330, n=514, accumulate=False),
- TestShape(m=540, k=332, n=516, accumulate=False),
- TestShape(m=1000, k=4, n=512, accumulate=False),
- TestShape(m=4, k=1000, n=512, accumulate=False),
- TestShape(m=512, k=1000, n=4, accumulate=False),
- TestShape(m=512, k=128, n=500, accumulate=False),
- TestShape(m=457, k=160, n=512, accumulate=False),
- TestShape(m=512, k=330, n=512, accumulate=False),
- ]
raise ValueError(shapes_id)
@@ -262,7 +257,7 @@
# Returns the list of Dynamicity's to use for the collection of shapes
# identified by shapes_id.
def get_dynamicities(shapes_id: ShapesId):
- if shapes_id == ShapesId.GPU_LARGE or shapes_id == ShapesId.GPU_LARGE_ALIGNED:
+ if shapes_id == ShapesId.EASY_LARGE_STATIC:
return [
Dynamicity.STATIC,
]
@@ -927,7 +922,8 @@
type=str,
choices=[s.value for s in ShapesId],
help="Collection of matrix shapes to test",
- required=True,
+ default="default",
+ required=False,
)
parser.add_argument(
"--transpose_rhs",
diff --git a/tools/testing/e2e/test_utils.c b/tools/testing/e2e/test_utils.c
index 2981148..a7119dc 100644
--- a/tools/testing/e2e/test_utils.c
+++ b/tools/testing/e2e/test_utils.c
@@ -198,12 +198,23 @@
return fabsf(iree_math_f16_to_f32(actual.f16_u16) -
iree_math_f16_to_f32(expected.f16_u16)) <
acceptable_fp_delta;
- case IREE_TEST_UTILS_VALUE_TYPE_BF16:
+ case IREE_TEST_UTILS_VALUE_TYPE_BF16: {
if (actual.bf16_u16 == expected.bf16_u16) return true;
+ // This is the rare case where the accumulator itself (not just LHS/RHS)
+ // is bf16. This doesn't really happen in practice and is mostly just in
+ // some CPU tests for completeness. Accumulators grow outside of the
+ // narrow range of bf16 exact representation of integers, forcing fuzzy
+ // compares.
+ float actual_f32 = iree_math_bf16_to_f32(actual.bf16_u16);
+ float expected_f32 = iree_math_bf16_to_f32(expected.bf16_u16);
+ if (fabsf(actual_f32) > 127.0f || fabsf(expected_f32) > 127.0f) {
+ if (fabsf(actual_f32 - expected_f32) < 10.0f) {
+ return true;
+ }
+ }
if (iree_test_utils_require_exact_results()) return false;
- return fabsf(iree_math_bf16_to_f32(actual.bf16_u16) -
- iree_math_bf16_to_f32(expected.bf16_u16)) <
- acceptable_fp_delta;
+ return fabsf(actual_f32 - expected_f32) < acceptable_fp_delta;
+ }
case IREE_TEST_UTILS_VALUE_TYPE_F32:
if (actual.f32 == expected.f32) return true;
if (iree_test_utils_require_exact_results()) return false;