Fixes and simplifications to CPU-data handling (#13881)
In DispatchABI.cpp:
* Was not checking if CPU feature bits in the schema applied to the
target architecture.
* Was not checking if CPU feature bits belonged to field 0 even though
the current code only handles that. Other fields are currently unused
but at least now we will get an assert failure if that changes.
* Was building a vector of bits, then creating one `LLVM::OrOp`for each
bit. Instead, why not OR those bits in C++ and create a single
`LLVM::OrOp` with the resulting OR-ed value?
* Was not checking that the first character of each feature string was a
`'+'` before dropping it.
* Was room for simplification in how the schema was used.
Additionally, folding into this PR a simplification to `iree-cpuinfo` (a
standalone tool to report the host CPU's features exactly as detected by
IREE). It was quadratic-time, as it unnecessarily called
`iree_cpu_lookup_data_by_key` for each feature.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 25c2ffb..39709b2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -6,6 +6,7 @@
#include "iree/compiler/Codegen/LLVMCPU/DispatchABI.h"
+#include "iree/compiler/Codegen/Utils/Utils.h"
#include "iree/schemas/cpu_data.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Support/CommandLine.h"
@@ -24,39 +25,6 @@
namespace mlir {
namespace iree_compiler {
-// List of all defined llvm feature-name to bit pattern used to represent it.
-// This is derived based on the schema in `runtime/src/iree/schemas/`.
-// TODO(ravishankarm): This link to the runtime schemas needs to be broken.
-// Instead we should use a reflection callback to resolve arch guarded features
-// directly in the compiler.
-
-// Struct to capture tuple of llvm feature-name to bit pattern used to represent
-// it.
-struct iree_llvm_name_and_bit_pattern_t {
- const char *llvm_name;
- unsigned long long bit_pattern;
-};
-
-#define IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name) \
- IREE_CPU_DATA##field_index##_##arch##_##bit_name
-
-#define IREE_CPU_FEATURE_NAME_AND_BIT_PATTERN(arch, field_index, bit_name, \
- llvm_name) \
- {llvm_name, IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name)},
-
-static const struct iree_llvm_name_and_bit_pattern_t
- iree_llvm_name_and_bit_pattern_list[] = {
-
-#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
- IREE_CPU_FEATURE_NAME_AND_BIT_PATTERN(arch, field_index, bit_name, llvm_name)
-#include "iree/schemas/cpu_feature_bits.inl"
-#undef IREE_CPU_FEATURE_BIT
-
-};
-
-#undef IREE_CPU_FEATURE_NAME_AND_BIT_PATTERN
-#undef IREE_CPU_FEATURE_BIT_NAME
-
//------------------------------------------------------------------------------
// ExecutableLibraryDI
//------------------------------------------------------------------------------
@@ -885,24 +853,47 @@
return processorDataPtrValue;
}
- SmallVector<uint64_t> specifiedFeatureBitPatterns;
+ // Currently requiring all CPU feature bits to be in field 0. Generalize as
+ // needed when other CPU feature fields start to be used.
+ uint64_t specifiedCpuDataField0 = 0;
{
+ // Map llvm feature-name to bit used to represent it in IREE_CPUDATA_FIELD0.
+ //
+ // TODO(ravishankarm): This link to the runtime schemas needs to be broken.
+ // Instead we should use a reflection callback to resolve arch guarded
+ // features directly in the compiler.
llvm::StringMap<uint64_t> featureToBitPattern;
- for (auto [llvmName, bitPattern] : iree_llvm_name_and_bit_pattern_list) {
- featureToBitPattern[llvmName] = bitPattern;
+ auto targetTriple = getTargetTriple(targetAttr);
+ if (!targetTriple) {
+ return processorDataPtrValue;
}
+ std::string targetArchUppercase =
+ StringRef(getIreeArchNameForTargetTriple(targetTriple.value())).upper();
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ if (targetArchUppercase == #arch) { \
+ assert(field_index == 0); \
+ featureToBitPattern[llvm_name] = 1ull << bit_pos; \
+ }
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
+
+ // Find CPU features in featureToBitPattern
SmallVector<StringRef> cpuFeatureStrings;
llvm::cast<StringAttr>(cpuFeatures->getValue())
.getValue()
.split(cpuFeatureStrings, ',', /*MakeSplit=*/-1, /*KeepEmpty=*/false);
for (auto featureString : cpuFeatureStrings) {
- if (featureToBitPattern.count(featureString.drop_front())) {
- specifiedFeatureBitPatterns.push_back(
- featureToBitPattern.lookup(featureString.drop_front()));
+ // CPU features are typically prefixed with a +, e.g. +avx,+avx2,+fma.
+ featureString.consume_front("+");
+ // Silently skip unknown CPU features, more flexible for now. Note that
+ // some featurs occurring here are not standard CPU features but internal
+ // things such as the "+reserve-x18" that we add on arm64.
+ if (featureToBitPattern.count(featureString)) {
+ specifiedCpuDataField0 |= featureToBitPattern.lookup(featureString);
}
}
}
- if (specifiedFeatureBitPatterns.empty()) {
+ if (specifiedCpuDataField0 == 0) {
return processorDataPtrValue;
}
@@ -919,11 +910,9 @@
Value srcData0 =
builder.create<LLVM::LoadOp>(loc, i64Ty, processorDataPtrValue);
// Set the specified CPU arch data.
- for (auto bitPattern : specifiedFeatureBitPatterns) {
- Value bitPatternVal = builder.create<LLVM::ConstantOp>(
- loc, i64Ty, builder.getI64IntegerAttr(bitPattern));
- srcData0 = builder.create<LLVM::OrOp>(loc, srcData0, bitPatternVal);
- }
+ Value bitPatternVal = builder.create<LLVM::ConstantOp>(
+ loc, i64Ty, builder.getI64IntegerAttr(specifiedCpuDataField0));
+ srcData0 = builder.create<LLVM::OrOp>(loc, srcData0, bitPatternVal);
builder.create<LLVM::StoreOp>(loc, srcData0, alloca);
// Copy over the rest.
for (int64_t i = 1, e = ProcessorDataCapacity; i < e; ++i) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
index 7acae65..3a3cb25 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
@@ -102,13 +102,7 @@
}
// CHECK: llvm.func @paramstruct_cconv_with_extra_fields_and_executable_target(!llvm.ptr)
// CHECK: llvm.func @bar
-// CHECK-DAG: %[[F16C:.+]] = llvm.mlir.constant(16384 : i64) : i64
-// CHECK-DAG: %[[FMA:.+]] = llvm.mlir.constant(2048 : i64) : i64
-// CHECK-DAG: %[[AVX2:.+]] = llvm.mlir.constant(32768 : i64) : i64
-// CHECK-DAG: %[[AVX:.+]] = llvm.mlir.constant(1024 : i64) : i64
-// CHECK-DAG: %[[SSE41:.+]] = llvm.mlir.constant(4 : i64) : i64
-// CHECK-DAG: %[[SSSE3:.+]] = llvm.mlir.constant(2 : i64) : i64
-// CHECK-DAG: %[[SSE3:.+]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK-DAG: %[[CPUDATA_FIELD0:.+]] = llvm.mlir.constant(52239 : i64) : i64
// CHECK-DAG: %[[C8:.+]] = llvm.mlir.constant(8 : i64) : i64
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : index) : i64
// CHECK-DAG: %[[Ci32:.+]] = llvm.mlir.constant(42 : i32) : i32
@@ -117,15 +111,8 @@
// CHECK-DAG: %[[DATA_PTR:.+]] = llvm.getelementptr inbounds %arg0[4]
// CHECK: %[[PROCESSOR_DATA_ALLOCA:.+]] = llvm.alloca %[[C8]] x i64 {alignment = 8 : i64}
// CHECK-DAG: %[[DATA:.+]] = llvm.load %[[DATA_PTR]]
-// CHECK-DAG: %[[OR0:.+]] = llvm.or %[[DATA]], %[[SSE3]]
-// CHECK-DAG: %[[OR1:.+]] = llvm.or %[[OR0]], %[[SSSE3]]
-// CHECK-DAG: %[[OR2:.+]] = llvm.or %[[OR1]], %[[SSE41]]
-// CHECK-DAG: %[[OR3:.+]] = llvm.or %[[OR2]], %[[C8]]
-// CHECK-DAG: %[[OR4:.+]] = llvm.or %[[OR3]], %[[AVX]]
-// CHECK-DAG: %[[OR5:.+]] = llvm.or %[[OR4]], %[[AVX2]]
-// CHECK-DAG: %[[OR6:.+]] = llvm.or %[[OR5]], %[[FMA]]
-// CHECK-DAG: %[[OR7:.+]] = llvm.or %[[OR6]], %[[F16C]]
-// CHECK: llvm.store %[[OR7]], %[[PROCESSOR_DATA_ALLOCA]]
+// CHECK-DAG: %[[OR0:.+]] = llvm.or %[[DATA]], %[[CPUDATA_FIELD0]]
+// CHECK: llvm.store %[[OR0]], %[[PROCESSOR_DATA_ALLOCA]]
// CHECK: %[[PROCESSOR_DATA_PTR_1:.+]] = llvm.getelementptr inbounds %[[DATA_PTR]][1]
// CHECK: %[[PROCESSOR_DATA_1:.+]] = llvm.load %[[PROCESSOR_DATA_PTR_1]]
diff --git a/tools/iree-cpuinfo.c b/tools/iree-cpuinfo.c
index d740465..f5324d0 100644
--- a/tools/iree-cpuinfo.c
+++ b/tools/iree-cpuinfo.c
@@ -11,11 +11,11 @@
int main(int argc, char *argv[]) {
iree_cpu_initialize(iree_allocator_system());
+ const uint64_t* cpu_data = iree_cpu_data_fields();
#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
if (IREE_ARCH_ENUM == IREE_ARCH_ENUM_##arch) { \
- int64_t result = 0; \
- IREE_CHECK_OK(iree_cpu_lookup_data_by_key(IREE_SV(llvm_name), &result)); \
+ bool result = (cpu_data[field_index] & (1ull << bit_pos)) != 0; \
printf("%-20s %ld\n", llvm_name, result); \
}
#include "iree/schemas/cpu_feature_bits.inl"