Centralize CPU features, fix `iree_cpu_lookup_data_by_key`, add a `iree-cpuinfo` tool (#12498)
The starting point of this PR was that I realized that I had forgotten
to update `iree_cpu_lookup_data_by_key`.
That sent me looking for a way to have it automatically sync'd with the
rest of the code, resulting in the present `.inl` approach.
As a bonus, I added a `iree-cpuinfo` tool, also automatically sync'd
with the `.inl` and just a few lines of impl. This is a quick debugging
tool allowing to answer quickly the question "which CPU features are we
actually detecting on this machine". Obviously the point is not provide
the best CPU feature detection tool, the point is that it's by
construction bug-for-bug equivalent to IREE's actual CPU feature
detection.
diff --git a/runtime/src/iree/base/internal/cpu.c b/runtime/src/iree/base/internal/cpu.c
index 3bc25f1..66f784c 100644
--- a/runtime/src/iree/base/internal/cpu.c
+++ b/runtime/src/iree/base/internal/cpu.c
@@ -221,39 +221,6 @@
}
//===----------------------------------------------------------------------===//
-// Architecture-specific string lookup
-//===----------------------------------------------------------------------===//
-
-#define IREE_TEST_FIELD_BIT(field_key, field_value, bit_value) \
- if (iree_string_view_equal(key, IREE_SV(field_key))) { \
- *out_value = iree_all_bits_set((field_value), (bit_value)) ? 1 : 0; \
- return true; \
- }
-
-#if defined(IREE_ARCH_ARM_64)
-
-static bool iree_cpu_lookup_data_by_key_for_arch(
- const uint64_t* fields, iree_string_view_t key,
- int64_t* IREE_RESTRICT out_value) {
- IREE_TEST_FIELD_BIT("dotprod", fields[0], IREE_CPU_DATA0_ARM_64_DOTPROD);
- IREE_TEST_FIELD_BIT("i8mm", fields[0], IREE_CPU_DATA0_ARM_64_I8MM);
- return false;
-}
-
-#else
-
-static bool iree_cpu_lookup_data_by_key_for_arch(
- const uint64_t* fields, iree_string_view_t key,
- int64_t* IREE_RESTRICT out_value) {
- // Not yet implemented for this architecture.
- return false;
-}
-
-#endif // IREE_ARCH_*
-
-#undef IREE_TEST_FIELD_BIT
-
-//===----------------------------------------------------------------------===//
// Processor data query
//===----------------------------------------------------------------------===//
@@ -289,15 +256,30 @@
sizeof(*out_fields));
}
+//===----------------------------------------------------------------------===//
+// Processor data lookup by key
+//===----------------------------------------------------------------------===//
+
iree_status_t iree_cpu_lookup_data_by_key(iree_string_view_t key,
int64_t* IREE_RESTRICT out_value) {
- if (!iree_cpu_lookup_data_by_key_for_arch(iree_cpu_data_cache_, key,
- out_value)) {
- return iree_make_status(IREE_STATUS_NOT_FOUND,
- "CPU data key '%.*s' not found", (int)key.size,
- key.data);
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ if (IREE_ARCH_ENUM == IREE_ARCH_ENUM_##arch) { \
+ if (iree_string_view_equal(key, IREE_SV(llvm_name))) { \
+ *out_value = iree_all_bits_set( \
+ (iree_cpu_data_cache_[field_index]), \
+ IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name)) \
+ ? 1 \
+ : 0; \
+ return iree_ok_status(); \
+ } \
}
- return iree_ok_status();
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "CPU feature '%.*s' unknown on this architecture (%s)", (int)key.size,
+ key.data, IREE_ARCH);
}
//===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/base/target_platform.h b/runtime/src/iree/base/target_platform.h
index 1abd987..376fb27 100644
--- a/runtime/src/iree/base/target_platform.h
+++ b/runtime/src/iree/base/target_platform.h
@@ -11,6 +11,7 @@
// one platform+architecture pair for that platform.
//
// IREE_ARCH ("arm_32", "arm_64", etc)
+// IREE_ARCH_ENUM (IREE_ARCH_ENUM_ARM_32, etc)
// IREE_ARCH_ARM_32
// IREE_ARCH_ARM_64
// IREE_ARCH_RISCV_32
@@ -53,41 +54,60 @@
// IREE_ARCH_*
//==============================================================================
+enum iree_arch_enum_e {
+ IREE_ARCH_ENUM_ARM_32,
+ IREE_ARCH_ENUM_ARM_64,
+ IREE_ARCH_ENUM_RISCV_32,
+ IREE_ARCH_ENUM_RISCV_64,
+ IREE_ARCH_ENUM_WASM_32,
+ IREE_ARCH_ENUM_WASM_64,
+ IREE_ARCH_ENUM_X86_32,
+ IREE_ARCH_ENUM_X86_64,
+};
+
#if defined(__arm__) || defined(__arm64) || defined(__aarch64__) || \
defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
defined(__TARGET_ARCH_THUMB) || defined(_M_ARM)
#if defined(__arm64) || defined(__aarch64__)
#define IREE_ARCH "arm_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_ARM_64
#define IREE_ARCH_ARM_64 1
#else
#define IREE_ARCH "arm_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_ARM_32
#define IREE_ARCH_ARM_32 1
#endif // __arm64
#endif // ARM
#if defined(__riscv) && (__riscv_xlen == 32)
#define IREE_ARCH "riscv_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_RISCV_32
#define IREE_ARCH_RISCV_32 1
#elif defined(__riscv) && (__riscv_xlen == 64)
#define IREE_ARCH "riscv_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_RISCV_64
#define IREE_ARCH_RISCV_64 1
#endif // RISCV
#if defined(__wasm32__)
#define IREE_ARCH "wasm_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_WASM_32
#define IREE_ARCH_WASM_32 1
#elif defined(__wasm64__)
#define IREE_ARCH "wasm_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_WASM_64
#define IREE_ARCH_WASM_64 1
#endif // WASM
#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
defined(__i686__) || defined(__i386) || defined(_M_IX86) || defined(_X86_)
#define IREE_ARCH "x86_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_X86_32
#define IREE_ARCH_X86_32 1
#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \
defined(__amd64) || defined(_M_X64)
#define IREE_ARCH "x86_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_X86_64
#define IREE_ARCH_X86_64 1
#endif // X86
diff --git a/runtime/src/iree/schemas/BUILD b/runtime/src/iree/schemas/BUILD
index 6f4464a..33af0e2 100644
--- a/runtime/src/iree/schemas/BUILD
+++ b/runtime/src/iree/schemas/BUILD
@@ -70,5 +70,6 @@
name = "cpu_data",
hdrs = [
"cpu_data.h",
+ "cpu_feature_bits.inl",
],
)
diff --git a/runtime/src/iree/schemas/CMakeLists.txt b/runtime/src/iree/schemas/CMakeLists.txt
index 78a8e9a..5e96962 100644
--- a/runtime/src/iree/schemas/CMakeLists.txt
+++ b/runtime/src/iree/schemas/CMakeLists.txt
@@ -93,6 +93,7 @@
cpu_data
HDRS
"cpu_data.h"
+ "cpu_feature_bits.inl"
DEPS
PUBLIC
diff --git a/runtime/src/iree/schemas/cpu_data.h b/runtime/src/iree/schemas/cpu_data.h
index 50a4ab3..46df305 100644
--- a/runtime/src/iree/schemas/cpu_data.h
+++ b/runtime/src/iree/schemas/cpu_data.h
@@ -60,58 +60,16 @@
// in the future.
#define IREE_CPU_DATA_FIELD_COUNT 8
+#define IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name) \
+ IREE_CPU_DATA##field_index##_##arch##_##bit_name
+
// Bitmasks and values for processor data field 0.
enum iree_cpu_data_field_0_e {
- //===--------------------------------------------------------------------===//
- // IREE_ARCH_ARM_64 / aarch64
- //===--------------------------------------------------------------------===//
-
- // TODO: add several common ARM ISA extensions and allocate some ranges of
- // bits for some families/eras. If we just start out with bits 0 and 1
- // allocated for dotprod and i8mm, we are quickly going to have a hard-to-read
- // enumeration here.
- IREE_CPU_DATA0_ARM_64_DOTPROD = 1ull << 0,
- IREE_CPU_DATA0_ARM_64_I8MM = 1ull << 1,
-
- //===--------------------------------------------------------------------===//
- // IREE_ARCH_X86_64 / x86-64
- //===--------------------------------------------------------------------===//
-
- // SSE features. Note: SSE and SSE2 are mandatory parts of X86-64.
- IREE_CPU_DATA0_X86_64_SSE3 = 1ull << 0,
- IREE_CPU_DATA0_X86_64_SSSE3 = 1ull << 1,
- IREE_CPU_DATA0_X86_64_SSE41 = 1ull << 2,
- IREE_CPU_DATA0_X86_64_SSE42 = 1ull << 3,
- IREE_CPU_DATA0_X86_64_SSE4A = 1ull << 4,
-
- // AVX features.
- IREE_CPU_DATA0_X86_64_AVX = 1ull << 10,
- IREE_CPU_DATA0_X86_64_FMA3 = 1ull << 11,
- IREE_CPU_DATA0_X86_64_FMA4 = 1ull << 12,
- IREE_CPU_DATA0_X86_64_XOP = 1ull << 13,
- IREE_CPU_DATA0_X86_64_F16C = 1ull << 14,
- IREE_CPU_DATA0_X86_64_AVX2 = 1ull << 15,
-
- // AVX-512 features.
- IREE_CPU_DATA0_X86_64_AVX512F = 1ull << 20,
- IREE_CPU_DATA0_X86_64_AVX512CD = 1ull << 21,
- IREE_CPU_DATA0_X86_64_AVX512VL = 1ull << 22,
- IREE_CPU_DATA0_X86_64_AVX512DQ = 1ull << 23,
- IREE_CPU_DATA0_X86_64_AVX512BW = 1ull << 24,
- IREE_CPU_DATA0_X86_64_AVX512IFMA = 1ull << 25,
- IREE_CPU_DATA0_X86_64_AVX512VBMI = 1ull << 26,
- IREE_CPU_DATA0_X86_64_AVX512VPOPCNTDQ = 1ull << 27,
- IREE_CPU_DATA0_X86_64_AVX512VNNI = 1ull << 28,
- IREE_CPU_DATA0_X86_64_AVX512VBMI2 = 1ull << 29,
- IREE_CPU_DATA0_X86_64_AVX512BITALG = 1ull << 30,
- IREE_CPU_DATA0_X86_64_AVX512BF16 = 1ull << 31,
- IREE_CPU_DATA0_X86_64_AVX512FP16 = 1ull << 32,
-
- // AMX features.
- IREE_CPU_DATA0_X86_64_AMXTILE = 1ull << 50,
- IREE_CPU_DATA0_X86_64_AMXINT8 = 1ull << 51,
- IREE_CPU_DATA0_X86_64_AMXBF16 = 1ull << 52,
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name) = 1ull << bit_pos,
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
};
diff --git a/runtime/src/iree/schemas/cpu_feature_bits.inl b/runtime/src/iree/schemas/cpu_feature_bits.inl
new file mode 100644
index 0000000..93ef0a9
--- /dev/null
+++ b/runtime/src/iree/schemas/cpu_feature_bits.inl
@@ -0,0 +1,87 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+// CPU features: IREE cpu_data bits and mapping to LLVM target attribute keys.
+//===----------------------------------------------------------------------===//
+//
+// Refer to the file comment in cpu_data.h. Summary:
+// - This is included in both compiler and runtime.
+// - Unconditionally define CPU features for all target architectures, not just
+// host, because this is needed by the compiler when targeting non-host.
+// - The bit values will soon be set in stone, because they will be encoded in
+// generated modules.
+// - Try to pack related features in the same cpu_data field and in nearby bits
+// if possible, on a best-effort basis.
+
+#ifndef IREE_CPU_FEATURE_BIT
+#error Define IREE_CPU_FEATURE_BIT before including this file.
+#endif
+
+// Format:
+// IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, "llvm_name")
+//
+// Where:
+// - `arch` is the CPU architecture that this CPU feature applies to, in
+// IREE's uppercase convention (e.g. ARM_64, X86_64; see IREE_ARCH_*).
+// - `field_index` is the index into the array returned by `iree_cpu_data_fields()`.
+// Allowed values range from 0 to (IREE_CPU_DATA_FIELD_COUNT-1).
+// - `bit_pos` is the position of the feature bit within that cpu data field.
+// As these fields are uint64_t, the range of `bit_pos` is 0..63.
+// - `bit_name` is the suffix to use to form the IREE C identifier for this
+// feature's bit value.
+// - `llvm_name` is the string name of the corresponding LLVM target attribute
+// (without a leading +).
+
+//===----------------------------------------------------------------------===//
+// IREE_ARCH_ARM_64 / aarch64
+//===----------------------------------------------------------------------===//
+
+// TODO: add several common ARM ISA extensions and allocate some ranges of
+// bits for some families/eras. If we just start out with bits 0 and 1
+// allocated for dotprod and i8mm, we are quickly going to have a hard-to-read
+// enumeration here.
+IREE_CPU_FEATURE_BIT(ARM_64, 0, 0, DOTPROD, "dotprod")
+IREE_CPU_FEATURE_BIT(ARM_64, 0, 1, I8MM, "i8mm")
+
+//===----------------------------------------------------------------------===//
+// IREE_ARCH_X86_64 / x86-64
+//===----------------------------------------------------------------------===//
+
+// SSE features. Note: SSE and SSE2 are mandatory parts of X86-64.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 0, SSE3, "sse3")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 1, SSSE3, "ssse3")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 2, SSE41, "sse4.1")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 3, SSE42, "sse4.2")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 4, SSE4A, "sse4a")
+
+// AVX features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 10, AVX, "avx")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 11, FMA3, "fma")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 12, FMA4, "fma4")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 13, XOP, "xop")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 14, F16C, "f16c")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 15, AVX2, "avx2")
+
+// AVX-512 features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 20, AVX512F, "avx512f")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 21, AVX512CD, "avx512cd")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 22, AVX512VL, "avx512vl")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 23, AVX512DQ, "avx512dq")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 24, AVX512BW, "avx512bw")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 25, AVX512IFMA, "avx512ifma")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 26, AVX512VBMI, "avx512vbmi")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 27, AVX512VPOPCNTDQ, "avx512vpopcntdq")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 28, AVX512VNNI, "avx512vnni")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 29, AVX512VBMI2, "avx512vbmi2")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 30, AVX512BITALG, "avx512bitalg")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 31, AVX512BF16, "avx512bf16")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 32, AVX512FP16, "avx512fp16")
+
+// AMX features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 50, AMXTILE, "amx-tile")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 51, AMXINT8, "amx-int8")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 52, AMXBF16, "amx-bf16")
diff --git a/tools/BUILD b/tools/BUILD
index a6bdb90..c21ee68 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -211,6 +211,16 @@
)
cc_binary(
+ name = "iree-cpuinfo",
+ srcs = ["iree-cpuinfo.c"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:cpu",
+ "//runtime/src/iree/schemas:cpu_data",
+ ],
+)
+
+cc_binary(
name = "iree-tblgen",
srcs = [
"//compiler/src/iree/compiler/Dialect/VM/Tools:GenSrcs",
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 7d37b6c..aac88a3 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -186,6 +186,17 @@
yaml
)
+iree_cc_binary(
+ NAME
+ iree-cpuinfo
+ SRCS
+ "iree-cpuinfo.c"
+ DEPS
+ iree::base
+ iree::base::internal::cpu
+ iree::schemas::cpu_data
+)
+
if(IREE_BUILD_COMPILER)
# If a target backend that requires LLD to link codegen executables is
# enabled, install the target.
diff --git a/tools/iree-cpuinfo.c b/tools/iree-cpuinfo.c
new file mode 100644
index 0000000..d740465
--- /dev/null
+++ b/tools/iree-cpuinfo.c
@@ -0,0 +1,25 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/cpu.h"
+
+int main(int argc, char *argv[]) {
+ iree_cpu_initialize(iree_allocator_system());
+
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ if (IREE_ARCH_ENUM == IREE_ARCH_ENUM_##arch) { \
+ int64_t result = 0; \
+ IREE_CHECK_OK(iree_cpu_lookup_data_by_key(IREE_SV(llvm_name), &result)); \
+ printf("%-20s %ld\n", llvm_name, result); \
+ }
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
+
+ return 0;
+}