blob: a9da1c9b3f2658046a31df3bf897e9c1bce5b40b [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h"
#include <mutex>
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/RISCVTargetParser.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TargetParser/X86TargetParser.h"
#include "mlir/IR/Builders.h"
namespace mlir::iree_compiler::IREE::HAL {
namespace {
bool resolveCPUAndCPUFeatures(llvm::StringRef inCpu,
llvm::StringRef inCpuFeatures,
const llvm::Triple &triple, std::string &outCpu,
std::string &outCpuFeatures) {
// Resolve "host"
if (inCpu == "host" || inCpuFeatures == "host") {
// If either Cpu or CpuFeatures is "host", the other must be either also
// host or the default value.
bool isCpuHostOrDefault =
inCpu.empty() || inCpu == "host" || inCpu == "generic";
bool isCpuFeaturesHostOrDefault =
inCpuFeatures.empty() || inCpuFeatures == "host";
if (!(isCpuHostOrDefault && isCpuFeaturesHostOrDefault)) {
llvm::errs()
<< "error: If either cpu or CpuFeatures is `host`, the other must "
"be either also `host` or the default value\n";
return false;
}
outCpu = triple.isX86() ? llvm::sys::getHostCPUName().str() : "";
llvm::SubtargetFeatures features;
for (auto &feature : llvm::sys::getHostCPUFeatures()) {
features.AddFeature(feature.first(), feature.second);
}
outCpuFeatures = features.getString();
} else {
outCpu = inCpu;
outCpuFeatures = inCpuFeatures;
}
// Target-specific CPU feature tweaks that we need unconditionally.
if (triple.isAArch64()) {
llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures);
// x18 is platform-reserved per the Aarch64 procedure call specification.
targetCpuFeatures.AddFeature("reserve-x18", true);
outCpuFeatures = targetCpuFeatures.getString();
}
if (outCpu.empty() || inCpu == "host" || inCpu == "generic" ||
inCpu.starts_with("generic-")) {
return true;
}
// If CPU is non-host and non-generic then we need to populate the
// corresponding features.
llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures);
auto addCpuFeatures = [&](const auto &getFeaturesForCPU,
auto &cpuFeatureList) {
getFeaturesForCPU(outCpu, cpuFeatureList, false);
for (const auto &feature : cpuFeatureList) {
targetCpuFeatures.AddFeature(feature);
}
};
if (triple.isX86()) {
llvm::SmallVector<llvm::StringRef> cpuFeatureList;
addCpuFeatures(llvm::X86::getFeaturesForCPU, cpuFeatureList);
} else if (triple.isRISCV64()) {
llvm::SmallVector<std::string> cpuFeatureList;
addCpuFeatures(llvm::RISCV::getFeaturesForCPU, cpuFeatureList);
} else {
llvm::errs()
<< "error: Resolution of target CPU to target CPU features is not "
"implemented on "
"this target architecture. Pass explicit CPU features "
"instead of a CPU "
"on this architecture, or implement that.\n";
return false;
}
outCpuFeatures = targetCpuFeatures.getString();
return true;
}
} // namespace
LLVMTarget::LLVMTarget() {
// LLVM loop optimization options.
pipelineTuningOptions.LoopInterleaving = DEFAULT_LOOP_INTERLEAVING;
pipelineTuningOptions.LoopVectorization = DEFAULT_LOOP_VECTORIZATION;
pipelineTuningOptions.LoopUnrolling = DEFAULT_LOOP_UNROLLING;
// LLVM SLP Auto vectorizer.
pipelineTuningOptions.SLPVectorization = DEFAULT_SLP_VECTORIZATION;
// LLVM optimization levels.
// TODO(benvanik): add an option for this.
optimizerOptLevel = llvm::OptimizationLevel::O2;
codeGenOptLevel = llvm::CodeGenOptLevel::Aggressive;
llvmTargetOptions.FloatABIType = DEFAULT_FLOAT_ABI;
// Force `-ffunction-sections` so we can strip unused code.
llvmTargetOptions.FunctionSections = true;
llvmTargetOptions.DataSections = true;
llvmTargetOptions.UniqueSectionNames = true;
}
std::optional<LLVMTarget> LLVMTarget::create(std::string_view triple,
std::string_view cpu,
std::string_view cpuFeatures,
bool requestLinkEmbedded) {
LLVMTarget target;
target.linkEmbedded = requestLinkEmbedded;
target.triple = triple;
llvm::Triple targetTriple(target.triple);
// Special casing if linkEmbedded.
if (targetTriple.isWasm()) {
// The embedded ELF loader is not supported on WebAssembly, so force it off.
target.linkEmbedded = false;
}
if (target.linkEmbedded) {
// Force the triple to something compatible with embedded linking.
targetTriple.setVendor(llvm::Triple::VendorType::UnknownVendor);
targetTriple.setEnvironment(llvm::Triple::EnvironmentType::EABI);
targetTriple.setOS(llvm::Triple::OSType::UnknownOS);
targetTriple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF);
target.triple = targetTriple.str();
}
if (!resolveCPUAndCPUFeatures(cpu, cpuFeatures, llvm::Triple(triple),
target.cpu, target.cpuFeatures)) {
// Something bad happened, and our target might not be what the user expects
// but we need to continue to avoid breaking existing users. Hopefully
// resolveCPUAndCPUFeatures logged a helpful error already.
}
return target;
}
std::optional<LLVMTarget> LLVMTarget::createForHost() {
auto target =
LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host",
/*cpuFeatures=*/"host",
/*requestLinkEmbedded=*/true);
if (target)
target->populateDefaultsFromTargetMachine();
return target;
}
void LLVMTarget::print(llvm::raw_ostream &os) const {
os << "LLVMTarget{\n"
<< " triple=" << triple << ", cpu=" << cpu
<< ", cpuFeatures=" << cpuFeatures << "\n"
<< " dataLayout=" << dataLayout << "\n"
<< " vectorWidthInBytes=" << vectorWidthInBytes << "\n"
<< " linkEmbedded=" << linkEmbedded << "\n"
<< " debugSymbols=" << debugSymbols << "\n"
<< " sanitizer=" << static_cast<int>(sanitizerKind) << "\n"
<< " staticLibraryOutput=" << staticLibraryOutput << "\n"
<< " linkStatic=" << linkStatic << "\n"
<< " pipelineTuningOptions={\n"
<< " LoopInterleaving=" << pipelineTuningOptions.LoopInterleaving
<< "\n"
<< " LoopVectorization=" << pipelineTuningOptions.LoopVectorization
<< "\n"
<< " LoopUnrolling=" << pipelineTuningOptions.LoopUnrolling << "\n"
<< " SLPVectorization=" << pipelineTuningOptions.SLPVectorization
<< "\n"
<< " }, llvmTargetOptions={\n"
<< " FloatABIType=" << static_cast<int>(llvmTargetOptions.FloatABIType)
<< "\n"
<< " }\n"
<< " ukernels=" << ukernels << "\n"
<< " linkUkernelBitcode=" << linkUkernelBitcode << "\n"
<< "}\n";
}
void LLVMTarget::storeToConfigAttrs(MLIRContext *context,
SmallVector<NamedAttribute> &config) const {
Builder b(context);
auto addString = [&](StringRef name, StringRef value) {
config.emplace_back(b.getStringAttr(name), b.getStringAttr(value));
};
auto addBool = [&](StringRef name, bool value) {
config.emplace_back(b.getStringAttr(name), b.getBoolAttr(value));
};
auto addInt64 = [&](StringRef name, int64_t value) {
config.emplace_back(b.getStringAttr(name), b.getI64IntegerAttr(value));
};
addString("target_triple", triple);
addString("cpu", cpu);
addString("cpu_features", cpuFeatures);
if (!dataLayout.empty()) {
addString("data_layout", dataLayout);
}
if (vectorWidthInBytes != DEFAULT_VECTOR_WIDTH_IN_BYTES) {
addInt64("native_vector_size", vectorWidthInBytes);
}
if (linkEmbedded != DEFAULT_LINK_EMBEDDED) {
addBool("link_embedded", linkEmbedded);
}
if (debugSymbols != DEFAULT_DEBUG_SYMBOLS) {
addBool("debug_symbols", debugSymbols);
}
if (linkStatic != DEFAULT_LINK_STATIC) {
addBool("link_static", linkStatic);
}
if (sanitizerKind != DEFAULT_SANITIZER_KIND) {
switch (sanitizerKind) {
case SanitizerKind::kNone:
addString("sanitizer", "none");
break;
case SanitizerKind::kAddress:
addString("sanitizer", "address");
break;
case SanitizerKind::kThread:
addString("sanitizer", "thread");
break;
}
}
if (!staticLibraryOutput.empty()) {
addString("static_library_output", staticLibraryOutput);
}
if (pipelineTuningOptions.LoopInterleaving != DEFAULT_LOOP_INTERLEAVING)
addBool("loop_interleaving", pipelineTuningOptions.LoopInterleaving);
if (pipelineTuningOptions.LoopVectorization != DEFAULT_LOOP_VECTORIZATION)
addBool("loop_vectorization", pipelineTuningOptions.LoopVectorization);
if (pipelineTuningOptions.LoopUnrolling != DEFAULT_LOOP_UNROLLING)
addBool("loop_unrolling", pipelineTuningOptions.LoopUnrolling);
if (pipelineTuningOptions.SLPVectorization != DEFAULT_SLP_VECTORIZATION)
addBool("slp_vectorization", pipelineTuningOptions.SLPVectorization);
if (!llvmTargetOptions.MCOptions.ABIName.empty())
addString("target_abi", llvmTargetOptions.MCOptions.ABIName);
if (llvmTargetOptions.FloatABIType != DEFAULT_FLOAT_ABI) {
switch (llvmTargetOptions.FloatABIType) {
case llvm::FloatABI::Default:
addString("float_abi", "default");
break;
case llvm::FloatABI::Soft:
addString("float_abi", "soft");
break;
case llvm::FloatABI::Hard:
addString("float_abi", "hard");
break;
}
}
if (ukernels.compare(DEFAULT_ENABLE_UKERNELS) != 0)
addString("ukernels", ukernels);
if (linkUkernelBitcode != DEFAULT_LINK_UKERNEL_BITCODE)
addBool("link_ukernel_bitcode", linkUkernelBitcode);
}
std::optional<LLVMTarget>
LLVMTarget::loadFromConfigAttr(Location loc, DictionaryAttr config,
const LLVMTarget &defaultTarget) {
bool hasFailures = false;
auto getString = [&](StringRef name, StringRef fallback,
bool required) -> StringRef {
Attribute attr = config.get(name);
if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) {
return sattr.strref();
} else {
if (required) {
hasFailures = true;
emitError(loc) << "executable config '" << name
<< "' required but not present on attribute";
}
return fallback;
}
};
auto getOptionalString = [&](StringRef name) -> std::optional<StringRef> {
Attribute attr = config.get(name);
if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) {
return sattr.strref();
} else if (attr) {
hasFailures = true;
emitError(loc) << "executable config '" << name
<< "' requires string but got " << attr;
}
return {};
};
auto getBool = [&](StringRef name, bool fallback) -> bool {
Attribute attr = config.get(name);
if (auto battr = llvm::dyn_cast_if_present<BoolAttr>(attr)) {
return battr.getValue();
} else if (attr) {
hasFailures = true;
emitError(loc) << "executable config '" << name
<< "' requires bool but got " << attr;
}
return fallback;
};
auto getInt64 = [&](StringRef name, int64_t fallback) -> int64_t {
Attribute attr = config.get(name);
if (auto iattr = llvm::dyn_cast_if_present<IntegerAttr>(attr)) {
return iattr.getValue().getSExtValue();
} else if (attr) {
hasFailures = true;
emitError(loc) << "executable config '" << name
<< "' requires i64 but got " << attr;
}
return fallback;
};
LLVMTarget target;
// Constructor arguments.
auto triple = getOptionalString("target_triple");
auto cpu = getOptionalString("cpu");
auto cpuFeatures = getOptionalString("cpu_features");
bool linkEmbedded = getBool("link_embedded", DEFAULT_LINK_EMBEDDED);
if (triple || cpu || cpuFeatures) {
if (!triple) {
emitError(loc) << "executable config 'cpu' or 'cpu_features' must be "
"accompanied by 'target_triple'";
return {};
}
std::optional<LLVMTarget> maybeTarget =
LLVMTarget::create(*triple, cpu ? *cpu : "generic",
cpuFeatures ? *cpuFeatures : "", linkEmbedded);
if (!maybeTarget) {
return {};
}
target.copy(*maybeTarget);
} else {
target.copy(defaultTarget);
}
target.dataLayout = getString("data_layout", DEFAULT_DATA_LAYOUT, false);
target.vectorWidthInBytes =
getInt64("native_vector_size", DEFAULT_VECTOR_WIDTH_IN_BYTES);
target.debugSymbols = getBool("debug_symbols", DEFAULT_DEBUG_SYMBOLS);
target.linkStatic = getBool("link_static", DEFAULT_LINK_STATIC);
auto sanitizer = getOptionalString("sanitizer");
if (sanitizer) {
if (sanitizer == "none")
target.sanitizerKind = SanitizerKind::kNone;
else if (sanitizer == "address")
target.sanitizerKind = SanitizerKind::kAddress;
else if (sanitizer == "thread")
target.sanitizerKind = SanitizerKind::kThread;
else {
emitError(loc) << "executable config unexpected value for 'sanitizer': "
<< *sanitizer;
return {};
}
}
target.staticLibraryOutput = getString("static_library_output", "", false);
target.pipelineTuningOptions.LoopInterleaving = getBool(
"loop_interleaving", target.pipelineTuningOptions.LoopInterleaving);
target.pipelineTuningOptions.LoopVectorization = getBool(
"loop_vectorization", target.pipelineTuningOptions.LoopVectorization);
target.pipelineTuningOptions.LoopUnrolling =
getBool("loop_unrolling", target.pipelineTuningOptions.LoopUnrolling);
target.pipelineTuningOptions.SLPVectorization = getBool(
"slp_vectorization", target.pipelineTuningOptions.SLPVectorization);
auto targetAbi = getOptionalString("target_abi");
if (targetAbi)
target.llvmTargetOptions.MCOptions.ABIName = *targetAbi;
auto floatAbi = getOptionalString("float_abi");
if (floatAbi) {
if (floatAbi == "default")
target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
else if (floatAbi == "soft")
target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
else if (floatAbi == "hard")
target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
else {
emitError(loc) << "executable config unexpected value for 'float_abi'";
return {};
}
}
target.ukernels = getString("ukernels", target.ukernels, false);
target.linkUkernelBitcode =
getBool("link_ukernel_bitcode", target.linkUkernelBitcode);
if (hasFailures) {
return {};
}
target.populateDefaultsFromTargetMachine();
return target;
}
void LLVMTarget::populateDefaultsFromTargetMachine() {
// We may need the target machine for certain default values.
std::unique_ptr<llvm::TargetMachine> cachedTargetMachine;
auto getTargetMachine = [&]() {
if (!cachedTargetMachine) {
cachedTargetMachine = createTargetMachine(*this);
// TODO(#13988): proper error propagation. This is a common user scenario.
assert(cachedTargetMachine && "createTargetMachine failed");
}
return cachedTargetMachine.get();
};
if (dataLayout.empty()) {
auto targetDataLayout = getTargetMachine()->createDataLayout();
dataLayout = targetDataLayout.getStringRepresentation();
}
if (vectorWidthInBytes == DEFAULT_VECTOR_WIDTH_IN_BYTES) {
auto targetMachine = getTargetMachine();
auto targetFeatures = targetMachine->getTargetFeatureString();
// The only way to get the real TTI is to create a function using it.
// LLVM's TargetMachine and related APIs are terrible. Absolutely yuck.
// Note that we use the data layout set above to either what the user
// specified or what the target machine returned.
//
// If anyone comes across this: it'd be great if getTargetTransformInfo
// could be called without requiring a function.
llvm::LLVMContext llvmContext;
auto llvmModule =
std::make_unique<llvm::Module>("dummy_module", llvmContext);
llvmModule->setDataLayout(dataLayout);
llvm::Function *dummyFunc = llvm::Function::Create(
llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), false),
llvm::GlobalValue::ExternalLinkage, "dummy_func", *llvmModule);
if (targetFeatures.contains("avx512")) {
// Always override the vector with to 512 on systems with avx512.
// @dcaballe says:
// > in ML the frequency throttling that happens when using 512-bit
// > register doesn't have an overall negative impact in performance due
// > to the high computational density of the workloads, even on skylake
// > where the throttling was really bad
dummyFunc->addFnAttr("prefer-vector-width", "512");
}
auto targetTTI = targetMachine->getTargetTransformInfo(*dummyFunc);
// Query the vector width from TTI.
unsigned ttiVectorWidthInBytes =
targetTTI.getRegisterBitWidth(
llvm::TargetTransformInfo::RGK_FixedWidthVector) /
8;
vectorWidthInBytes = ttiVectorWidthInBytes > 1 ? ttiVectorWidthInBytes : 16;
}
}
std::unique_ptr<llvm::TargetMachine>
createTargetMachine(const LLVMTarget &target) {
std::string errorMessage;
auto llvmTarget =
llvm::TargetRegistry::lookupTarget(target.getTriple(), errorMessage);
if (!llvmTarget)
return nullptr;
std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine(
target.getTriple(), target.getCpu() /* cpu e.g k8 */,
target.getCpuFeatures() /* cpu features e.g avx512f */,
target.llvmTargetOptions, llvm::Reloc::Model::PIC_, {},
target.codeGenOptLevel,
/*JIT=*/false));
return machine;
}
static void initializeLLVMTargets() {
// Dynamically do preprocessor dispatch to initialize only targets that we
// care about if they are enabled. Unfortunately, the way the LLVM macros
// for this are set up and the inability to do a conditional within a macro
// means that we have to syntactically have a macro for every possible
// target we care about. There are more robust ways to do this but they all
// require build support, which is a pain to manage across platforms.
//
// See comments below.
#define LLVM_INITIALIZE_GENERIC(TargetName) \
LLVMInitialize##TargetName##Target(); \
LLVMInitialize##TargetName##TargetMC(); \
LLVMInitialize##TargetName##TargetInfo(); \
LLVMInitialize##TargetName##AsmPrinter(); \
LLVMInitialize##TargetName##AsmParser();
// CPU targets that we care about and have hard-linked against are here.
// They delegate to the generic initialize above. These must all be added
// to the build file or you will get undefined symbol errors at link time.
#define LLVM_INITIALIZE_TARGET_AArch64() LLVM_INITIALIZE_GENERIC(AArch64)
#define LLVM_INITIALIZE_TARGET_ARM() LLVM_INITIALIZE_GENERIC(ARM)
#define LLVM_INITIALIZE_TARGET_RISCV() LLVM_INITIALIZE_GENERIC(RISCV)
#define LLVM_INITIALIZE_TARGET_X86() LLVM_INITIALIZE_GENERIC(X86)
#define LLVM_INITIALIZE_TARGET_WebAssembly() \
LLVM_INITIALIZE_GENERIC(WebAssembly)
// We must no-op the name of each target we don't care about. This is annoying,
// but targets aren't created every day and isn't the end of the world. The
// error messages when missing are quite clear and you just add a line here.
#define LLVM_INITIALIZE_TARGET_AMDGPU()
#define LLVM_INITIALIZE_TARGET_AVR()
#define LLVM_INITIALIZE_TARGET_BPF()
#define LLVM_INITIALIZE_TARGET_Hexagon()
#define LLVM_INITIALIZE_TARGET_Lanai()
#define LLVM_INITIALIZE_TARGET_LoongArch()
#define LLVM_INITIALIZE_TARGET_Mips()
#define LLVM_INITIALIZE_TARGET_MSP430()
#define LLVM_INITIALIZE_TARGET_NVPTX()
#define LLVM_INITIALIZE_TARGET_PowerPC()
#define LLVM_INITIALIZE_TARGET_Sparc()
#define LLVM_INITIALIZE_TARGET_SystemZ()
#define LLVM_INITIALIZE_TARGET_VE()
#define LLVM_INITIALIZE_TARGET_XCore()
#define LLVM_TARGET(TargetName) LLVM_INITIALIZE_TARGET_##TargetName()
#include "llvm/Config/Targets.def"
}
//===----------------------------------------------------------------------===//
// __ __ ___________ ____ ____ ____ ______ __ __ //
// | | | | | ____\ \ / / \ \ / / / __ \ | | | | //
// | |__| | | |__ \ \/ / \ \/ / | | | | | | | | //
// | __ | | __| \_ _/ \_ _/ | | | | | | | | //
// | | | | | |____ | | __ | | | `--' | | `--' | //
// |__| |__| |_______| |__| (_ ) |__| \______/ \______/ //
// |/ //
//===----------------------------------------------------------------------===//
//
// Beware adding command-line flags here: IREE is a cross-compiler and can
// compile for multiple targets in a single invocation. Global flags added here
// apply to all targets with no way to override them from hosting applications
// that may need to programmatically set them per target and that's bad.
//
// Flags *must* be added to the LLVMTarget if they are target-specific and
// LLVMTargetOptions if they are apply to the whole backend.
void LLVMCPUTargetCLOptions::bindOptions(OptionsBinder &binder) {
// Initialize LLVM targets prior to the iree-llvmcpu-list-targets CLI opt.
initializeLLVMTargets();
static llvm::cl::OptionCategory category("LLVMCPU HAL Target");
// General flags.
binder.opt<bool>(
"iree-llvmcpu-list-targets", listTargets, llvm::cl::cat(category),
llvm::cl::desc("Lists all registered targets that the LLVM backend can "
"generate code for."),
llvm::cl::ValueDisallowed, llvm::cl::callback([&](const bool &) {
llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs());
exit(0);
}));
// Target invariant flags.
binder.opt<std::string>(
"iree-llvmcpu-system-linker-path", systemLinkerPath,
llvm::cl::cat(category),
llvm::cl::desc("Tool used to link system shared libraries produced by "
"IREE (for --iree-llvmcpu-link-embedded=false)."));
binder.opt<std::string>(
"iree-llvmcpu-embedded-linker-path", embeddedLinkerPath,
llvm::cl::cat(category),
llvm::cl::desc("Tool used to link embedded ELFs produced by IREE (for "
"--iree-llvmcpu-link-embedded=true)."));
binder.opt<std::string>(
"iree-llvmcpu-wasm-linker-path", wasmLinkerPath, llvm::cl::cat(category),
llvm::cl::desc("Tool used to link WebAssembly modules produced by "
"IREE (for --iree-llvmcpu-target-triple=wasm32-*)."));
binder.opt<bool>(
"iree-llvmcpu-keep-linker-artifacts", keepLinkerArtifacts,
llvm::cl::cat(category),
llvm::cl::desc("Keep LLVM linker target artifacts (.so/.dll/etc)"));
// Default device options.
binder.opt<std::string>("iree-llvmcpu-target-triple", targetTriple,
llvm::cl::cat(category),
llvm::cl::desc("LLVM target machine triple."));
binder.opt<std::string>(
"iree-llvmcpu-target-cpu", targetCPU, llvm::cl::cat(category),
llvm::cl::desc(
"LLVM target machine CPU; use 'host' for your host native CPU."));
binder.opt<std::string>(
"iree-llvmcpu-target-cpu-features", targetCPUFeatures,
llvm::cl::cat(category),
llvm::cl::desc("LLVM target machine CPU features; use 'host' for your "
"host native CPU."));
binder.opt<bool>(
"iree-llvmcpu-link-embedded", linkEmbedded, llvm::cl::cat(category),
llvm::cl::desc("Links binaries into a platform-agnostic ELF to be "
"loaded by the embedded IREE ELF loader."));
binder.opt<bool>(
"iree-llvmcpu-link-static", linkStatic, llvm::cl::cat(category),
llvm::cl::desc(
"Links system libraries into binaries statically to isolate them "
"from platform dependencies needed at runtime"));
binder.opt<std::string>(
"iree-llvmcpu-static-library-output-path", staticLibraryOutputPath,
llvm::cl::cat(category),
llvm::cl::desc(
"Path to output static object (EX: '/path/to/static-library.o'). "
"This will produce the static library at the specified path along "
"with a similarly named '.h' file for static linking."));
binder.opt<bool>(
"iree-llvmcpu-debug-symbols", debugSymbols, llvm::cl::cat(category),
llvm::cl::desc("Generate and embed debug information (DWARF, PDB, etc)"));
binder.opt<bool>("iree-llvmcpu-loop-interleaving", llvmLoopInterleaving,
llvm::cl::cat(category),
llvm::cl::desc("Enable LLVM loop interleaving opt"));
binder.opt<bool>("iree-llvmcpu-loop-vectorization", llvmLoopVectorization,
llvm::cl::cat(category),
llvm::cl::desc("Enable LLVM loop vectorization opt"));
binder.opt<bool>("iree-llvmcpu-loop-unrolling", llvmLoopUnrolling,
llvm::cl::cat(category),
llvm::cl::desc("Enable LLVM loop unrolling opt"));
binder.opt<bool>("iree-llvmcpu-slp-vectorization", llvmSLPVectorization,
llvm::cl::cat(category),
llvm::cl::desc("Enable LLVM SLP Vectorization opt"));
binder.opt<SanitizerKind>(
"iree-llvmcpu-sanitize", sanitizerKind, llvm::cl::cat(category),
llvm::cl::desc("Apply LLVM sanitize feature"),
llvm::cl::values(clEnumValN(SanitizerKind::kAddress, "address",
"Address sanitizer support"),
clEnumValN(SanitizerKind::kThread, "thread",
"Thread sanitizer support")));
binder.opt<std::string>(
"iree-llvmcpu-target-abi", targetABI, llvm::cl::cat(category),
llvm::cl::desc("LLVM target machine ABI; specify for -mabi"));
binder.opt<llvm::FloatABI::ABIType>(
"iree-llvmcpu-target-float-abi", targetFloatABI, llvm::cl::cat(category),
llvm::cl::desc("LLVM target codegen enables soft float abi e.g "
"-mfloat-abi=softfp"),
llvm::cl::values(
clEnumValN(llvm::FloatABI::Default, "default", "Default (softfp)"),
clEnumValN(llvm::FloatABI::Soft, "soft",
"Software floating-point emulation"),
clEnumValN(llvm::FloatABI::Hard, "hard",
"Hardware floating-point instructions")));
binder.opt<std::string>(
"iree-llvmcpu-target-data-layout", targetDataLayout,
llvm::cl::cat(category),
llvm::cl::desc("LLVM target machine data layout override."));
binder.opt<unsigned>("iree-llvmcpu-target-vector-width-in-bytes",
targetVectorWidthInBytes, llvm::cl::cat(category),
llvm::cl::desc("Overrides the native vector register "
"width (in bytes) of the target."));
binder.opt<std::string>(
"iree-llvmcpu-enable-ukernels", enableUkernels, llvm::cl::cat(category),
llvm::cl::desc("Enables ukernels in the llvmcpu backend. May be "
"`default`, `none`, `all`, or a comma-separated list of "
"specific unprefixed ukernels to enable, e.g. `mmt4d`."));
binder.opt<bool>(
"iree-llvmcpu-link-ukernel-bitcode", linkUKernelBitcode,
llvm::cl::cat(category),
llvm::cl::desc(
"Link ukernel bitcode libraries into generated executables"));
}
LLVMTargetOptions LLVMCPUTargetCLOptions::getTargetOptions() {
LLVMTargetOptions targetOptions;
targetOptions.systemLinkerPath = systemLinkerPath;
targetOptions.embeddedLinkerPath = embeddedLinkerPath;
targetOptions.wasmLinkerPath = wasmLinkerPath;
targetOptions.keepLinkerArtifacts = keepLinkerArtifacts;
if (targetTriple.empty()) {
targetTriple = llvm::sys::getProcessTriple();
}
std::optional<LLVMTarget> maybeTarget = LLVMTarget::create(
targetTriple, targetCPU, targetCPUFeatures, linkEmbedded);
if (maybeTarget) {
targetOptions.target = *maybeTarget;
} else {
llvm::errs() << "Inconsistency in iree-llvmcpu-target-cpu-* command-line"
"flags. The target CPU is not properly defined.\n";
}
LLVMTarget &target = targetOptions.target;
target.linkStatic = linkStatic;
target.staticLibraryOutput = staticLibraryOutputPath;
target.debugSymbols = debugSymbols;
target.pipelineTuningOptions.LoopInterleaving = llvmLoopInterleaving;
target.pipelineTuningOptions.LoopVectorization = llvmLoopVectorization;
target.pipelineTuningOptions.LoopUnrolling = llvmLoopUnrolling;
target.pipelineTuningOptions.SLPVectorization = llvmSLPVectorization;
target.sanitizerKind = sanitizerKind;
target.llvmTargetOptions.MCOptions.ABIName = targetABI;
target.llvmTargetOptions.FloatABIType = targetFloatABI;
target.dataLayout = targetDataLayout;
target.vectorWidthInBytes = targetVectorWidthInBytes;
target.ukernels = enableUkernels;
target.linkUkernelBitcode = linkUKernelBitcode;
target.populateDefaultsFromTargetMachine();
return targetOptions;
}
} // namespace mlir::iree_compiler::IREE::HAL