blob: 897372cac1bd0b93517a596111df7533bfe48ef6 [file] [log] [blame]
// Copyright 2022 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/tooling/cpu_features.h"
#include "iree/base/target_platform.h"
#if defined(IREE_PLATFORM_LINUX) && defined(IREE_ARCH_ARM_64)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#endif
#if defined(IREE_ARCH_ARM_64)
typedef enum {
iree_cpu_features_aarch64_register_ID_AA64ISAR0_EL1,
iree_cpu_features_aarch64_register_ID_AA64ISAR1_EL1,
iree_cpu_features_aarch64_register_count
} iree_cpu_features_aarch64_register_t;
struct iree_cpu_features_t {
uint64_t registers[iree_cpu_features_aarch64_register_count];
bool registers_initialized[iree_cpu_features_aarch64_register_count];
};
// Returns true if aarch64 cpu feature registers are accessible.
static bool iree_devices_features_aarch64_register_access() {
#if defined(IREE_PLATFORM_LINUX) && defined(HWCAP_CPUID)
// We are going to use access some special registers not accessible through
// userspace. That is only going to work thanks to
// https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html
// so we must first check if this feature is supported.
//
// Why not use this getauxval method to directly query the CPU features
// that we care about, e.g. HWCAP2_I8MM? Because for each new CPU feature
// that comes out, Linux needs to be updated to expose it in this way, and
// sometimes we dont't want to wait for a device to receive an updated
// Linux kernel before we can use the new feature. By only relying
// on HWCAP_CPUID, we freeze once and for all our Linux kernel version
// requirement to one that's already widespread.
//
// For the same reason, we're content to use the system-headers-provided
// HWCAP_CPUID and #if-out this block if it's not defined, because if it
// were not defined, at this point, that would mean we're compiling against
// aging Linux headers so maybe we're not interested about recent CPU
// features. But feel free to change that if you need to.
//
// Why query this everytime and not try to cache this in `features`?
// Because getauxval is really cheap. It's just a memory read (auxv), no IO.
// By contrast, what's expensive is the `mrs` instruction reading the actual
// registers below, because that triggers a (would-be SIGILL) exception that
// the kernel has to trap.
return getauxval(AT_HWCAP) & HWCAP_CPUID;
#endif
return false;
}
// Defines a local helper function returning the value of an aarch64 cpu feature
// register.
// clang-format off
#define IREE_CPU_FEATURES_AARCH64_READ_REGISTER(REG) \
static uint64_t iree_cpu_features_aarch64_read_##REG(void) { \
uint64_t retval = 0; \
asm("mrs %[dst], " #REG \
: /* outputs */ \
[dst] "=r"(retval) \
: /* inputs */ \
: /* clobbers */); \
return retval; \
}
// clang-format on
// Define the access functions for the cpu feature registers that we need.
IREE_CPU_FEATURES_AARCH64_READ_REGISTER(ID_AA64ISAR0_EL1)
IREE_CPU_FEATURES_AARCH64_READ_REGISTER(ID_AA64ISAR1_EL1)
// Helper for iree_cpu_features_aarch64_get_register:
// Immediately reads the value of a cpu feature register, by enum index.
// Expensive!
static uint64_t iree_cpu_features_aarch64_read_register(
iree_cpu_features_aarch64_register_t idx) {
switch (idx) {
case iree_cpu_features_aarch64_register_ID_AA64ISAR0_EL1:
return iree_cpu_features_aarch64_read_ID_AA64ISAR0_EL1();
case iree_cpu_features_aarch64_register_ID_AA64ISAR1_EL1:
return iree_cpu_features_aarch64_read_ID_AA64ISAR1_EL1();
case iree_cpu_features_aarch64_register_count: // case, not default, so
break; // the compiler can warn if we forget an enum value.
}
assert(false && "bad iree_cpu_features_aarch64_register_t value");
return 0;
}
// Returns the value of a cpu feature register. Caches the value in the
// iree_cpu_features_t struct.
static uint64_t iree_cpu_features_aarch64_get_register(
iree_cpu_features_t* features, iree_cpu_features_aarch64_register_t idx) {
if (!features->registers_initialized[idx]) {
if (iree_devices_features_aarch64_register_access()) {
features->registers[idx] = iree_cpu_features_aarch64_read_register(idx);
} /* else: leave registers[idx] with its initial 0 bits */
features->registers_initialized[idx] = true;
}
return features->registers[idx];
}
// Returns true if +dotprod is supported.
static bool iree_cpu_features_aarch64_dotprod(iree_cpu_features_t* features) {
uint64_t register_value = iree_cpu_features_aarch64_get_register(
features, iree_cpu_features_aarch64_register_ID_AA64ISAR0_EL1);
return (register_value >> 44) & 1;
}
// Returns true if +i8mm is supported.
static bool iree_cpu_features_aarch64_i8mm(iree_cpu_features_t* features) {
uint64_t register_value = iree_cpu_features_aarch64_get_register(
features, iree_cpu_features_aarch64_register_ID_AA64ISAR1_EL1);
return (register_value >> 52) & 1;
}
typedef iree_cpu_features_t iree_cpu_features_t;
#else // not defined(IREE_ARCH_ARM_64)
// Not-implemented case. Decided in PR #8316 to make it non-empty just to avoid
// edge cases with empty structs.
struct iree_cpu_features_t {
int unused;
};
#endif // defined(IREE_ARCH_ARM_64)
iree_status_t iree_cpu_features_allocate(iree_allocator_t allocator,
iree_cpu_features_t** cpu_features) {
return iree_allocator_malloc(allocator, sizeof(iree_cpu_features_t),
(void**)cpu_features);
}
void iree_cpu_features_free(iree_allocator_t allocator,
iree_cpu_features_t* cpu_features) {
iree_allocator_free(allocator, cpu_features);
}
iree_status_t iree_cpu_features_query(iree_cpu_features_t* cpu_features,
iree_string_view_t feature,
bool* out_result) {
*out_result = false;
#ifdef IREE_ARCH_ARM_64
if (iree_string_view_equal(feature, iree_make_cstring_view("+dotprod"))) {
*out_result = iree_cpu_features_aarch64_dotprod(cpu_features);
return iree_ok_status();
}
if (iree_string_view_equal(feature, iree_make_cstring_view("+i8mm"))) {
*out_result = iree_cpu_features_aarch64_i8mm(cpu_features);
return iree_ok_status();
}
#endif // IREE_ARCH_ARM_64
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unhandled CPU feature: '%.*s'", (int)feature.size,
feature.data);
}