Improvements to target CPU features variants in e2e tests (#13915)
* CPU feature variants now have a short mnemonic. Test names use it, so
they stay short regardless of the number of CPU features enabled. (Test
names were generated by joining all CPU feature strings, which was too
much with AVX-512 consisting of several sub-features such as
`+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni`).
* The CMake helper functions `process_target_cpu_features` is much
simplified, and renamed `parse_target_cpu_features_variant`. It's also
made more correct: it was suggesting that it was taking names of
output-variables as arguments, but was actually ignoring them and
writing to fixed variables. Now it's honoring its output-var arguments.
* Some recently added tests used the `"large"` matmul shapes set, but
weren't tagged `noriscv` and `notsan`, and as a result, were hitting >
10s latencies, a potential timeout risk. Fixed.
diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake
index a70a3cc..2365428 100644
--- a/build_tools/cmake/iree_check_test.cmake
+++ b/build_tools/cmake/iree_check_test.cmake
@@ -114,7 +114,6 @@
${_RULE_RUNNER_ARGS}
LABELS
${_RULE_LABELS}
- ${_RULE_TARGET_CPU_FEATURES}
TIMEOUT
${_RULE_TIMEOUT}
)
@@ -232,71 +231,52 @@
# Helper function parsing a string occurring as an entry in TARGET_CPU_FEATURES_VARIANTS.
#
# This function has 3 output-params: variables that it sets with PARENT_SCOPE:
-# _ENABLED, _TARGET_CPU_FEATURES, _TARGET_CPU_FEATURES_SUFFIX.
+# _ENABLED, _FEATURES_NAME, _FEATURES.
#
# "default" is handled specially. _ENABLED is always set to "TRUE" and
-# _TARGET_CPU_FEATURES, and _TARGET_CPU_FEATURES_SUFFIX are set to
+# _FEATURES_NAME and _FEATURES are set to
# the empty string.
#
-# Other values are parsed as "arch:features", the parsed arch is matched with
-# `IREE_ARCH`, `_ENABLED` is set to "TRUE" if and only if they
-# match, `_TARGET_CPU_FEATURES_SUFFIX` is set to a string based on the
-# features that is appropriate to include in a CMake target or test name, and
-# More than one target cpu feature is currently unsupported.
+# Other values are parsed as "arch:features_name:features". The `arch`
+# component is matched with `IREE_ARCH`, `_ENABLED` is set to "TRUE" if and
+# only if they match. In that case:
+# `_FEATURES_NAME` is set to `features_name`.
+# `_FEATURES` is set to `features`.
#
-# aarch64:+dotprod ->_ENABLED="TRUE" if the target architecture is aarch64,
-# _TARGET_CPU_FEATURES="+dotprod",
-# _TARGET_CPU_FEATURES_SUFFIX="_dotprod",
-# default -> _ENABLED="TRUE" unconditionally,
-# other output strings are "".
-function(process_target_cpu_features _INPUT_TARGET_CPU_FEATURES _ENABLED
- _TARGET_CPU_FEATURES _TARGET_CPU_FEATURES_SUFFIX)
- set(_TARGET_CPU_FEATURES "" PARENT_SCOPE)
- set(_TARGET_CPU_FEATURES_SUFFIX "" PARENT_SCOPE)
- if("${_INPUT_TARGET_CPU_FEATURES}" STREQUAL "default")
- set(_ENABLED "TRUE" PARENT_SCOPE)
+# Examples:
+#
+# default:
+# _ENABLED="TRUE" unconditionally,
+# other output strings are "".
+#
+# aarch64:dotprod:+dotprod:
+# _ENABLED="TRUE" if the target architecture is aarch64, and in that case:
+# _FEATURES_NAME="dotprod".
+# _FEATURES="+dotprod".
+function(parse_target_cpu_features_variant _VARIANT_STRING _ENABLED_VAR
+ _FEATURES_NAME_VAR _FEATURES_VAR)
+ set("${_ENABLED_VAR}" FALSE PARENT_SCOPE)
+ set("${_FEATURES_NAME_VAR}" "" PARENT_SCOPE)
+ set("${_FEATURES_VAR}" "" PARENT_SCOPE)
+ if("${_VARIANT_STRING}" STREQUAL "default")
+ set("{_ENABLED_VAR}" TRUE PARENT_SCOPE)
return()
endif()
- string(REGEX MATCHALL "[^:]+" _COMPONENTS "${_INPUT_TARGET_CPU_FEATURES}")
+ # Interpret _VARIANT_STRING as a CMake list (;-separated).
+ string(REPLACE ":" ";" _COMPONENTS "${_VARIANT_STRING}")
list(LENGTH _COMPONENTS _NUM_COMPONENTS)
- if(NOT _NUM_COMPONENTS EQUAL 2)
- message(SEND_ERROR "TARGET_CPU_FEATURES should be of the form \
-_FILTER_ARCH:_TARGET_CPU_FEATURES. Got: ${_INPUT_TARGET_CPU_FEATURES}")
+ if(NOT _NUM_COMPONENTS EQUAL 3)
+ message(SEND_ERROR "TARGET_CPU_FEATURES_VARIANTS should be of the form \
+ \"arch:features_name:features\". Got: \"${_VARIANT_STRING}\"")
return()
endif()
- # TARGET_CPU_FEATURES_VARIANT is of the form _FILTER_ARCH:_TARGET_CPU_FEATURE.
list(GET _COMPONENTS 0 _FILTER_ARCH)
- list(GET _COMPONENTS 1 _TARGET_CPU_FEATURES)
+ list(GET _COMPONENTS 1 _FEATURES_NAME)
+ list(GET _COMPONENTS 2 _FEATURES)
if(_FILTER_ARCH STREQUAL IREE_ARCH)
- set(_ENABLED "TRUE" PARENT_SCOPE)
- set(_TARGET_CPU_FEATURES "${_TARGET_CPU_FEATURES}" PARENT_SCOPE)
- # TODO: the logic to generate the suffix from the list of target CPU features
- # will need to be generalized when the lists have more than 1 element, when
- # some features are being disabled by a "-" sign, and if some features involve
- # any character that's not wanted in a cmake rule name.
- # For now, let's just generate errors in those cases:
- list(LENGTH _TARGET_CPU_FEATURES _NUM_TARGET_CPU_FEATURES)
- if(NOT _NUM_TARGET_CPU_FEATURES EQUAL 1)
- message(SEND_ERROR "Current limitation: \
-TARGET_CPU_FEATURES should have length 1")
- endif()
- string(SUBSTRING "${_TARGET_CPU_FEATURES}" 0 1 _TARGET_CPU_FEATURES_FIRST_CHAR)
- string(SUBSTRING "${_TARGET_CPU_FEATURES}" 1 -1 _TARGET_CPU_FEATURES_AFTER_FIRST_CHAR)
- if(NOT _TARGET_CPU_FEATURES_FIRST_CHAR STREQUAL "+")
- message(SEND_ERROR "Current limitation: \
-TARGET_CPU_FEATURES should start with a +. Got: ${_TARGET_CPU_FEATURES}.")
- endif()
- if(NOT _TARGET_CPU_FEATURES_AFTER_FIRST_CHAR MATCHES "[a-zA-Z0-9_]+")
- message(SEND_ERROR "Current limitation: \
-TARGET_CPU_FEATURES should match [a-zA-Z0-9]+ after the initial +. \
-Got: ${_TARGET_CPU_FEATURES}.")
- endif()
- # Generate the target cpu features suffix string with underscores ('_')
- # separating the features.
- string(REGEX REPLACE "[+,]+" "_" _TARGET_CPU_FEATURES_SUFFIX_LOCAL "${_TARGET_CPU_FEATURES}")
- set(_TARGET_CPU_FEATURES_SUFFIX "${_TARGET_CPU_FEATURES_SUFFIX_LOCAL}" PARENT_SCOPE)
- else()
- set(_ENABLED "FALSE" PARENT_SCOPE)
+ set("${_ENABLED_VAR}" TRUE PARENT_SCOPE)
+ set("${_FEATURES_NAME_VAR}" "${_FEATURES_NAME}" PARENT_SCOPE)
+ set("${_FEATURES_VAR}" "${_FEATURES}" PARENT_SCOPE)
endif()
endfunction()
@@ -364,12 +344,19 @@
else()
set(_TARGET_CPU_FEATURES_VARIANTS "default")
endif()
- foreach(_TARGET_CPU_FEATURES_LIST_ELEM IN LISTS _TARGET_CPU_FEATURES_VARIANTS)
- process_target_cpu_features("${_TARGET_CPU_FEATURES_LIST_ELEM}" _ENABLED _TARGET_CPU_FEATURES _TARGET_CPU_FEATURES_SUFFIX)
+ foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS)
+ parse_target_cpu_features_variant("${_VARIANT_STRING}"
+ _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES)
if(NOT _ENABLED)
# The current entry is disabled on the target CPU architecture.
continue()
endif()
+ set(_TARGET_CPU_FEATURES_SUFFIX "")
+ set(_LABELS "${_RULE_LABELS}")
+ if (_TARGET_CPU_FEATURES_NAME)
+ set(_TARGET_CPU_FEATURES_SUFFIX "_${_TARGET_CPU_FEATURES_NAME}")
+ list(APPEND _LABELS "cpu_features=${_TARGET_CPU_FEATURES_NAME}")
+ endif()
iree_check_single_backend_test_suite(
NAME
"${_RULE_NAME}_${_TARGET_BACKEND}_${_DRIVER}${_TARGET_CPU_FEATURES_SUFFIX}"
@@ -384,7 +371,7 @@
RUNNER_ARGS
${_RULE_RUNNER_ARGS}
LABELS
- ${_RULE_LABELS}
+ ${_LABELS}
TARGET_CPU_FEATURES
${_TARGET_CPU_FEATURES}
TIMEOUT
diff --git a/build_tools/cmake/iree_trace_runner_test.cmake b/build_tools/cmake/iree_trace_runner_test.cmake
index 2ea8eb0..4c6a00d 100644
--- a/build_tools/cmake/iree_trace_runner_test.cmake
+++ b/build_tools/cmake/iree_trace_runner_test.cmake
@@ -97,7 +97,6 @@
${_RULE_RUNNER_ARGS}
LABELS
${_RULE_LABELS}
- ${_RULE_TARGET_CPU_FEATURES}
)
endfunction()
@@ -320,12 +319,19 @@
else()
set(_TARGET_CPU_FEATURES_VARIANTS "default")
endif()
- foreach(_TARGET_CPU_FEATURES_LIST_ELEM IN LISTS _TARGET_CPU_FEATURES_VARIANTS)
- process_target_cpu_features("${_TARGET_CPU_FEATURES_LIST_ELEM}" _ENABLED _TARGET_CPU_FEATURES _TARGET_CPU_FEATURES_SUFFIX)
+ foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS)
+ parse_target_cpu_features_variant("${_VARIANT_STRING}"
+ _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES)
if(NOT _ENABLED)
# The current entry is disabled on the target CPU architecture.
continue()
endif()
+ set(_TARGET_CPU_FEATURES_SUFFIX "")
+ set(_LABELS "${_RULE_LABELS}")
+ if (_TARGET_CPU_FEATURES_NAME)
+ set(_TARGET_CPU_FEATURES_SUFFIX "_${_TARGET_CPU_FEATURES_NAME}")
+ list(APPEND _LABELS "cpu_features=${_TARGET_CPU_FEATURES_NAME}")
+ endif()
iree_single_backend_generated_trace_runner_test(
NAME
"${_RULE_NAME}_${_TARGET_BACKEND}_${_DRIVER}${_TARGET_CPU_FEATURES_SUFFIX}"
@@ -344,7 +350,7 @@
RUNNER_ARGS
${_RULE_RUNNER_ARGS}
LABELS
- ${_RULE_LABELS}
+ ${_LABELS}
TARGET_CPU_FEATURES
${_TARGET_CPU_FEATURES}
)
diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel
index 9381459..6b3965f 100644
--- a/tests/e2e/matmul/BUILD.bazel
+++ b/tests/e2e/matmul/BUILD.bazel
@@ -50,8 +50,8 @@
],
target_cpu_features_variants = ["default"] +
([
- "arm_64:+dotprod",
- "arm_64:+i8mm",
+ "arm_64:dotprod:+dotprod",
+ "arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
@@ -70,16 +70,17 @@
"--shapes=large",
],
tags = [
- # "--shapes=large" can cause timeouts on riscv emulator.
+ # "--shapes=large" can cause timeouts on riscv emulator and TSan.
"noriscv",
+ "notsan",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["default"] +
([
- "arm_64:+dotprod",
- "arm_64:+i8mm",
+ "arm_64:dotprod:+dotprod",
+ "arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
@@ -106,8 +107,8 @@
],
target_cpu_features_variants = ["default"] +
([
- "arm_64:+dotprod",
- "arm_64:+i8mm",
+ "arm_64:dotprod:+dotprod",
+ "arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
@@ -189,17 +190,22 @@
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=%s" % size,
],
+ tags = [
+ # "--shapes=large" can cause timeouts on riscv emulator and TSan.
+ "noriscv",
+ "notsan",
+ ] if size == "large" else [],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = [
"default",
- "x86_64:" + ",".join(X86_64_AVX2_FMA),
- "x86_64:" + ",".join(X86_64_AVX512_BASE),
+ "x86_64:avx2_fma:" + ",".join(X86_64_AVX2_FMA),
+ "x86_64:avx512_base:" + ",".join(X86_64_AVX512_BASE),
] + ([
- "x86_64:" + ",".join(X86_64_AVX512_VNNI),
- "arm_64:+dotprod",
- "arm_64:+i8mm",
+ "x86_64:avx512_vnni:" + ",".join(X86_64_AVX512_VNNI),
+ "arm_64:dotprod:+dotprod",
+ "arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt
index f6bb508..4784b8e 100644
--- a/tests/e2e/matmul/CMakeLists.txt
+++ b/tests/e2e/matmul/CMakeLists.txt
@@ -60,8 +60,8 @@
"--iree-flow-enable-data-tiling"
TARGET_CPU_FEATURES_VARIANTS
"default"
- "arm_64:+dotprod"
- "arm_64:+i8mm"
+ "arm_64:dotprod:+dotprod"
+ "arm_64:i8mm:+i8mm"
)
iree_generated_trace_runner_test(
@@ -102,10 +102,11 @@
"--iree-flow-enable-data-tiling"
LABELS
"noriscv"
+ "notsan"
TARGET_CPU_FEATURES_VARIANTS
"default"
- "arm_64:+dotprod"
- "arm_64:+i8mm"
+ "arm_64:dotprod:+dotprod"
+ "arm_64:i8mm:+i8mm"
)
iree_generated_trace_runner_test(
@@ -126,6 +127,7 @@
"--iree-flow-enable-data-tiling"
LABELS
"noriscv"
+ "notsan"
TARGET_CPU_FEATURES_VARIANTS
"default"
)
@@ -149,8 +151,8 @@
"--iree-flow-enable-data-tiling"
TARGET_CPU_FEATURES_VARIANTS
"default"
- "arm_64:+dotprod"
- "arm_64:+i8mm"
+ "arm_64:dotprod:+dotprod"
+ "arm_64:i8mm:+i8mm"
)
iree_generated_trace_runner_test(
@@ -265,13 +267,15 @@
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
+ LABELS
+
TARGET_CPU_FEATURES_VARIANTS
"default"
- "x86_64:+avx,+avx2,+fma"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
- "arm_64:+dotprod"
- "arm_64:+i8mm"
+ "x86_64:avx2_fma:+avx,+avx2,+fma"
+ "x86_64:avx512_base:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:avx512_vnni:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
+ "arm_64:dotprod:+dotprod"
+ "arm_64:i8mm:+i8mm"
)
iree_generated_trace_runner_test(
@@ -291,13 +295,16 @@
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
+ LABELS
+ "noriscv"
+ "notsan"
TARGET_CPU_FEATURES_VARIANTS
"default"
- "x86_64:+avx,+avx2,+fma"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
- "arm_64:+dotprod"
- "arm_64:+i8mm"
+ "x86_64:avx2_fma:+avx,+avx2,+fma"
+ "x86_64:avx512_base:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:avx512_vnni:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
+ "arm_64:dotprod:+dotprod"
+ "arm_64:i8mm:+i8mm"
)
iree_generated_trace_runner_test(
@@ -317,10 +324,12 @@
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
+ LABELS
+
TARGET_CPU_FEATURES_VARIANTS
"default"
- "x86_64:+avx,+avx2,+fma"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:avx2_fma:+avx,+avx2,+fma"
+ "x86_64:avx512_base:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
)
iree_generated_trace_runner_test(
@@ -340,10 +349,13 @@
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
+ LABELS
+ "noriscv"
+ "notsan"
TARGET_CPU_FEATURES_VARIANTS
"default"
- "x86_64:+avx,+avx2,+fma"
- "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:avx2_fma:+avx,+avx2,+fma"
+ "x86_64:avx512_base:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
)
iree_generated_trace_runner_test(