| // Copyright 2020 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h" |
| |
| #include <mutex> |
| |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/MC/TargetRegistry.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/TargetSelect.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include "llvm/TargetParser/Host.h" |
| #include "llvm/TargetParser/RISCVTargetParser.h" |
| #include "llvm/TargetParser/SubtargetFeature.h" |
| #include "llvm/TargetParser/Triple.h" |
| #include "llvm/TargetParser/X86TargetParser.h" |
| #include "mlir/IR/Builders.h" |
| |
| namespace mlir::iree_compiler::IREE::HAL { |
| |
| namespace { |
| |
| bool resolveCPUAndCPUFeatures(llvm::StringRef inCpu, |
| llvm::StringRef inCpuFeatures, |
| const llvm::Triple &triple, std::string &outCpu, |
| std::string &outCpuFeatures) { |
| // Resolve "host" |
| if (inCpu == "host" || inCpuFeatures == "host") { |
| // If either Cpu or CpuFeatures is "host", the other must be either also |
| // host or the default value. |
| bool isCpuHostOrDefault = |
| inCpu.empty() || inCpu == "host" || inCpu == "generic"; |
| bool isCpuFeaturesHostOrDefault = |
| inCpuFeatures.empty() || inCpuFeatures == "host"; |
| if (!(isCpuHostOrDefault && isCpuFeaturesHostOrDefault)) { |
| llvm::errs() |
| << "error: If either cpu or CpuFeatures is `host`, the other must " |
| "be either also `host` or the default value\n"; |
| return false; |
| } |
| outCpu = triple.isX86() ? llvm::sys::getHostCPUName().str() : ""; |
| llvm::SubtargetFeatures features; |
| for (auto &feature : llvm::sys::getHostCPUFeatures()) { |
| features.AddFeature(feature.first(), feature.second); |
| } |
| outCpuFeatures = features.getString(); |
| } else { |
| outCpu = inCpu; |
| outCpuFeatures = inCpuFeatures; |
| } |
| |
| // Target-specific CPU feature tweaks that we need unconditionally. |
| if (triple.isAArch64()) { |
| llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures); |
| // x18 is platform-reserved per the Aarch64 procedure call specification. |
| targetCpuFeatures.AddFeature("reserve-x18", true); |
| outCpuFeatures = targetCpuFeatures.getString(); |
| } |
| |
| if (outCpu.empty() || inCpu == "host" || inCpu == "generic" || |
| inCpu.starts_with("generic-")) { |
| return true; |
| } |
| // If CPU is non-host and non-generic then we need to populate the |
| // corresponding features. |
| llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures); |
| auto addCpuFeatures = [&](const auto &getFeaturesForCPU, |
| auto &cpuFeatureList) { |
| getFeaturesForCPU(outCpu, cpuFeatureList, false); |
| for (const auto &feature : cpuFeatureList) { |
| targetCpuFeatures.AddFeature(feature); |
| } |
| }; |
| if (triple.isX86()) { |
| llvm::SmallVector<llvm::StringRef> cpuFeatureList; |
| addCpuFeatures(llvm::X86::getFeaturesForCPU, cpuFeatureList); |
| } else if (triple.isRISCV64()) { |
| llvm::SmallVector<std::string> cpuFeatureList; |
| addCpuFeatures(llvm::RISCV::getFeaturesForCPU, cpuFeatureList); |
| } else { |
| llvm::errs() |
| << "error: Resolution of target CPU to target CPU features is not " |
| "implemented on " |
| "this target architecture. Pass explicit CPU features " |
| "instead of a CPU " |
| "on this architecture, or implement that.\n"; |
| return false; |
| } |
| outCpuFeatures = targetCpuFeatures.getString(); |
| return true; |
| } |
| |
| } // namespace |
| |
| LLVMTarget::LLVMTarget() { |
| // LLVM loop optimization options. |
| pipelineTuningOptions.LoopInterleaving = DEFAULT_LOOP_INTERLEAVING; |
| pipelineTuningOptions.LoopVectorization = DEFAULT_LOOP_VECTORIZATION; |
| pipelineTuningOptions.LoopUnrolling = DEFAULT_LOOP_UNROLLING; |
| |
| // LLVM SLP Auto vectorizer. |
| pipelineTuningOptions.SLPVectorization = DEFAULT_SLP_VECTORIZATION; |
| |
| // LLVM optimization levels. |
| // TODO(benvanik): add an option for this. |
| optimizerOptLevel = llvm::OptimizationLevel::O2; |
| codeGenOptLevel = llvm::CodeGenOptLevel::Aggressive; |
| llvmTargetOptions.FloatABIType = DEFAULT_FLOAT_ABI; |
| |
| // Force `-ffunction-sections` so we can strip unused code. |
| llvmTargetOptions.FunctionSections = true; |
| llvmTargetOptions.DataSections = true; |
| llvmTargetOptions.UniqueSectionNames = true; |
| } |
| |
| std::optional<LLVMTarget> LLVMTarget::create(std::string_view triple, |
| std::string_view cpu, |
| std::string_view cpuFeatures, |
| bool requestLinkEmbedded) { |
| LLVMTarget target; |
| target.linkEmbedded = requestLinkEmbedded; |
| |
| target.triple = triple; |
| llvm::Triple targetTriple(target.triple); |
| // Special casing if linkEmbedded. |
| if (targetTriple.isWasm()) { |
| // The embedded ELF loader is not supported on WebAssembly, so force it off. |
| target.linkEmbedded = false; |
| } |
| if (target.linkEmbedded) { |
| // Force the triple to something compatible with embedded linking. |
| targetTriple.setVendor(llvm::Triple::VendorType::UnknownVendor); |
| targetTriple.setEnvironment(llvm::Triple::EnvironmentType::EABI); |
| targetTriple.setOS(llvm::Triple::OSType::UnknownOS); |
| targetTriple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF); |
| target.triple = targetTriple.str(); |
| } |
| if (!resolveCPUAndCPUFeatures(cpu, cpuFeatures, llvm::Triple(triple), |
| target.cpu, target.cpuFeatures)) { |
| // Something bad happened, and our target might not be what the user expects |
| // but we need to continue to avoid breaking existing users. Hopefully |
| // resolveCPUAndCPUFeatures logged a helpful error already. |
| } |
| |
| return target; |
| } |
| |
| std::optional<LLVMTarget> LLVMTarget::createForHost() { |
| auto target = |
| LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host", |
| /*cpuFeatures=*/"host", |
| /*requestLinkEmbedded=*/true); |
| if (target) |
| target->populateDefaultsFromTargetMachine(); |
| return target; |
| } |
| |
| void LLVMTarget::print(llvm::raw_ostream &os) const { |
| os << "LLVMTarget{\n" |
| << " triple=" << triple << ", cpu=" << cpu |
| << ", cpuFeatures=" << cpuFeatures << "\n" |
| << " dataLayout=" << dataLayout << "\n" |
| << " vectorWidthInBytes=" << vectorWidthInBytes << "\n" |
| << " linkEmbedded=" << linkEmbedded << "\n" |
| << " debugSymbols=" << debugSymbols << "\n" |
| << " sanitizer=" << static_cast<int>(sanitizerKind) << "\n" |
| << " staticLibraryOutput=" << staticLibraryOutput << "\n" |
| << " linkStatic=" << linkStatic << "\n" |
| << " pipelineTuningOptions={\n" |
| << " LoopInterleaving=" << pipelineTuningOptions.LoopInterleaving |
| << "\n" |
| << " LoopVectorization=" << pipelineTuningOptions.LoopVectorization |
| << "\n" |
| << " LoopUnrolling=" << pipelineTuningOptions.LoopUnrolling << "\n" |
| << " SLPVectorization=" << pipelineTuningOptions.SLPVectorization |
| << "\n" |
| << " }, llvmTargetOptions={\n" |
| << " FloatABIType=" << static_cast<int>(llvmTargetOptions.FloatABIType) |
| << "\n" |
| << " }\n" |
| << " ukernels=" << ukernels << "\n" |
| << " linkUkernelBitcode=" << linkUkernelBitcode << "\n" |
| << "}\n"; |
| } |
| |
| void LLVMTarget::storeToConfigAttrs(MLIRContext *context, |
| SmallVector<NamedAttribute> &config) const { |
| Builder b(context); |
| auto addString = [&](StringRef name, StringRef value) { |
| config.emplace_back(b.getStringAttr(name), b.getStringAttr(value)); |
| }; |
| auto addBool = [&](StringRef name, bool value) { |
| config.emplace_back(b.getStringAttr(name), b.getBoolAttr(value)); |
| }; |
| auto addInt64 = [&](StringRef name, int64_t value) { |
| config.emplace_back(b.getStringAttr(name), b.getI64IntegerAttr(value)); |
| }; |
| |
| addString("target_triple", triple); |
| addString("cpu", cpu); |
| addString("cpu_features", cpuFeatures); |
| if (!dataLayout.empty()) { |
| addString("data_layout", dataLayout); |
| } |
| if (vectorWidthInBytes != DEFAULT_VECTOR_WIDTH_IN_BYTES) { |
| addInt64("native_vector_size", vectorWidthInBytes); |
| } |
| if (linkEmbedded != DEFAULT_LINK_EMBEDDED) { |
| addBool("link_embedded", linkEmbedded); |
| } |
| if (debugSymbols != DEFAULT_DEBUG_SYMBOLS) { |
| addBool("debug_symbols", debugSymbols); |
| } |
| if (linkStatic != DEFAULT_LINK_STATIC) { |
| addBool("link_static", linkStatic); |
| } |
| if (sanitizerKind != DEFAULT_SANITIZER_KIND) { |
| switch (sanitizerKind) { |
| case SanitizerKind::kNone: |
| addString("sanitizer", "none"); |
| break; |
| case SanitizerKind::kAddress: |
| addString("sanitizer", "address"); |
| break; |
| case SanitizerKind::kThread: |
| addString("sanitizer", "thread"); |
| break; |
| } |
| } |
| if (!staticLibraryOutput.empty()) { |
| addString("static_library_output", staticLibraryOutput); |
| } |
| if (pipelineTuningOptions.LoopInterleaving != DEFAULT_LOOP_INTERLEAVING) |
| addBool("loop_interleaving", pipelineTuningOptions.LoopInterleaving); |
| if (pipelineTuningOptions.LoopVectorization != DEFAULT_LOOP_VECTORIZATION) |
| addBool("loop_vectorization", pipelineTuningOptions.LoopVectorization); |
| if (pipelineTuningOptions.LoopUnrolling != DEFAULT_LOOP_UNROLLING) |
| addBool("loop_unrolling", pipelineTuningOptions.LoopUnrolling); |
| if (pipelineTuningOptions.SLPVectorization != DEFAULT_SLP_VECTORIZATION) |
| addBool("slp_vectorization", pipelineTuningOptions.SLPVectorization); |
| if (!llvmTargetOptions.MCOptions.ABIName.empty()) |
| addString("target_abi", llvmTargetOptions.MCOptions.ABIName); |
| if (llvmTargetOptions.FloatABIType != DEFAULT_FLOAT_ABI) { |
| switch (llvmTargetOptions.FloatABIType) { |
| case llvm::FloatABI::Default: |
| addString("float_abi", "default"); |
| break; |
| case llvm::FloatABI::Soft: |
| addString("float_abi", "soft"); |
| break; |
| case llvm::FloatABI::Hard: |
| addString("float_abi", "hard"); |
| break; |
| } |
| } |
| if (ukernels.compare(DEFAULT_ENABLE_UKERNELS) != 0) |
| addString("ukernels", ukernels); |
| if (linkUkernelBitcode != DEFAULT_LINK_UKERNEL_BITCODE) |
| addBool("link_ukernel_bitcode", linkUkernelBitcode); |
| } |
| |
| std::optional<LLVMTarget> |
| LLVMTarget::loadFromConfigAttr(Location loc, DictionaryAttr config, |
| const LLVMTarget &defaultTarget) { |
| bool hasFailures = false; |
| auto getString = [&](StringRef name, StringRef fallback, |
| bool required) -> StringRef { |
| Attribute attr = config.get(name); |
| if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) { |
| return sattr.strref(); |
| } else { |
| if (required) { |
| hasFailures = true; |
| emitError(loc) << "executable config '" << name |
| << "' required but not present on attribute"; |
| } |
| return fallback; |
| } |
| }; |
| auto getOptionalString = [&](StringRef name) -> std::optional<StringRef> { |
| Attribute attr = config.get(name); |
| if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) { |
| return sattr.strref(); |
| } else if (attr) { |
| hasFailures = true; |
| emitError(loc) << "executable config '" << name |
| << "' requires string but got " << attr; |
| } |
| return {}; |
| }; |
| auto getBool = [&](StringRef name, bool fallback) -> bool { |
| Attribute attr = config.get(name); |
| if (auto battr = llvm::dyn_cast_if_present<BoolAttr>(attr)) { |
| return battr.getValue(); |
| } else if (attr) { |
| hasFailures = true; |
| emitError(loc) << "executable config '" << name |
| << "' requires bool but got " << attr; |
| } |
| return fallback; |
| }; |
| auto getInt64 = [&](StringRef name, int64_t fallback) -> int64_t { |
| Attribute attr = config.get(name); |
| if (auto iattr = llvm::dyn_cast_if_present<IntegerAttr>(attr)) { |
| return iattr.getValue().getSExtValue(); |
| } else if (attr) { |
| hasFailures = true; |
| emitError(loc) << "executable config '" << name |
| << "' requires i64 but got " << attr; |
| } |
| return fallback; |
| }; |
| |
| LLVMTarget target; |
| |
| // Constructor arguments. |
| auto triple = getOptionalString("target_triple"); |
| auto cpu = getOptionalString("cpu"); |
| auto cpuFeatures = getOptionalString("cpu_features"); |
| bool linkEmbedded = getBool("link_embedded", DEFAULT_LINK_EMBEDDED); |
| if (triple || cpu || cpuFeatures) { |
| if (!triple) { |
| emitError(loc) << "executable config 'cpu' or 'cpu_features' must be " |
| "accompanied by 'target_triple'"; |
| return {}; |
| } |
| std::optional<LLVMTarget> maybeTarget = |
| LLVMTarget::create(*triple, cpu ? *cpu : "generic", |
| cpuFeatures ? *cpuFeatures : "", linkEmbedded); |
| if (!maybeTarget) { |
| return {}; |
| } |
| target.copy(*maybeTarget); |
| } else { |
| target.copy(defaultTarget); |
| } |
| |
| target.dataLayout = getString("data_layout", DEFAULT_DATA_LAYOUT, false); |
| target.vectorWidthInBytes = |
| getInt64("native_vector_size", DEFAULT_VECTOR_WIDTH_IN_BYTES); |
| |
| target.debugSymbols = getBool("debug_symbols", DEFAULT_DEBUG_SYMBOLS); |
| target.linkStatic = getBool("link_static", DEFAULT_LINK_STATIC); |
| auto sanitizer = getOptionalString("sanitizer"); |
| if (sanitizer) { |
| if (sanitizer == "none") |
| target.sanitizerKind = SanitizerKind::kNone; |
| else if (sanitizer == "address") |
| target.sanitizerKind = SanitizerKind::kAddress; |
| else if (sanitizer == "thread") |
| target.sanitizerKind = SanitizerKind::kThread; |
| else { |
| emitError(loc) << "executable config unexpected value for 'sanitizer': " |
| << *sanitizer; |
| return {}; |
| } |
| } |
| target.staticLibraryOutput = getString("static_library_output", "", false); |
| |
| target.pipelineTuningOptions.LoopInterleaving = getBool( |
| "loop_interleaving", target.pipelineTuningOptions.LoopInterleaving); |
| target.pipelineTuningOptions.LoopVectorization = getBool( |
| "loop_vectorization", target.pipelineTuningOptions.LoopVectorization); |
| target.pipelineTuningOptions.LoopUnrolling = |
| getBool("loop_unrolling", target.pipelineTuningOptions.LoopUnrolling); |
| target.pipelineTuningOptions.SLPVectorization = getBool( |
| "slp_vectorization", target.pipelineTuningOptions.SLPVectorization); |
| auto targetAbi = getOptionalString("target_abi"); |
| if (targetAbi) |
| target.llvmTargetOptions.MCOptions.ABIName = *targetAbi; |
| auto floatAbi = getOptionalString("float_abi"); |
| if (floatAbi) { |
| if (floatAbi == "default") |
| target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default; |
| else if (floatAbi == "soft") |
| target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default; |
| else if (floatAbi == "hard") |
| target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default; |
| else { |
| emitError(loc) << "executable config unexpected value for 'float_abi'"; |
| return {}; |
| } |
| } |
| |
| target.ukernels = getString("ukernels", target.ukernels, false); |
| target.linkUkernelBitcode = |
| getBool("link_ukernel_bitcode", target.linkUkernelBitcode); |
| |
| if (hasFailures) { |
| return {}; |
| } |
| target.populateDefaultsFromTargetMachine(); |
| return target; |
| } |
| |
| void LLVMTarget::populateDefaultsFromTargetMachine() { |
| // We may need the target machine for certain default values. |
| std::unique_ptr<llvm::TargetMachine> cachedTargetMachine; |
| auto getTargetMachine = [&]() { |
| if (!cachedTargetMachine) { |
| cachedTargetMachine = createTargetMachine(*this); |
| // TODO(#13988): proper error propagation. This is a common user scenario. |
| assert(cachedTargetMachine && "createTargetMachine failed"); |
| } |
| return cachedTargetMachine.get(); |
| }; |
| |
| if (dataLayout.empty()) { |
| auto targetDataLayout = getTargetMachine()->createDataLayout(); |
| dataLayout = targetDataLayout.getStringRepresentation(); |
| } |
| |
| if (vectorWidthInBytes == DEFAULT_VECTOR_WIDTH_IN_BYTES) { |
| auto targetMachine = getTargetMachine(); |
| auto targetFeatures = targetMachine->getTargetFeatureString(); |
| |
| // The only way to get the real TTI is to create a function using it. |
| // LLVM's TargetMachine and related APIs are terrible. Absolutely yuck. |
| // Note that we use the data layout set above to either what the user |
| // specified or what the target machine returned. |
| // |
| // If anyone comes across this: it'd be great if getTargetTransformInfo |
| // could be called without requiring a function. |
| llvm::LLVMContext llvmContext; |
| auto llvmModule = |
| std::make_unique<llvm::Module>("dummy_module", llvmContext); |
| llvmModule->setDataLayout(dataLayout); |
| llvm::Function *dummyFunc = llvm::Function::Create( |
| llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), false), |
| llvm::GlobalValue::ExternalLinkage, "dummy_func", *llvmModule); |
| if (targetFeatures.contains("avx512")) { |
| // Always override the vector with to 512 on systems with avx512. |
| // @dcaballe says: |
| // > in ML the frequency throttling that happens when using 512-bit |
| // > register doesn't have an overall negative impact in performance due |
| // > to the high computational density of the workloads, even on skylake |
| // > where the throttling was really bad |
| dummyFunc->addFnAttr("prefer-vector-width", "512"); |
| } |
| auto targetTTI = targetMachine->getTargetTransformInfo(*dummyFunc); |
| |
| // Query the vector width from TTI. |
| unsigned ttiVectorWidthInBytes = |
| targetTTI.getRegisterBitWidth( |
| llvm::TargetTransformInfo::RGK_FixedWidthVector) / |
| 8; |
| vectorWidthInBytes = ttiVectorWidthInBytes > 1 ? ttiVectorWidthInBytes : 16; |
| } |
| } |
| |
| std::unique_ptr<llvm::TargetMachine> |
| createTargetMachine(const LLVMTarget &target) { |
| std::string errorMessage; |
| auto llvmTarget = |
| llvm::TargetRegistry::lookupTarget(target.getTriple(), errorMessage); |
| if (!llvmTarget) |
| return nullptr; |
| std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine( |
| target.getTriple(), target.getCpu() /* cpu e.g k8 */, |
| target.getCpuFeatures() /* cpu features e.g avx512f */, |
| target.llvmTargetOptions, llvm::Reloc::Model::PIC_, {}, |
| target.codeGenOptLevel, |
| /*JIT=*/false)); |
| return machine; |
| } |
| |
| static void initializeLLVMTargets() { |
| // Dynamically do preprocessor dispatch to initialize only targets that we |
| // care about if they are enabled. Unfortunately, the way the LLVM macros |
| // for this are set up and the inability to do a conditional within a macro |
| // means that we have to syntactically have a macro for every possible |
| // target we care about. There are more robust ways to do this but they all |
| // require build support, which is a pain to manage across platforms. |
| // |
| // See comments below. |
| #define LLVM_INITIALIZE_GENERIC(TargetName) \ |
| LLVMInitialize##TargetName##Target(); \ |
| LLVMInitialize##TargetName##TargetMC(); \ |
| LLVMInitialize##TargetName##TargetInfo(); \ |
| LLVMInitialize##TargetName##AsmPrinter(); \ |
| LLVMInitialize##TargetName##AsmParser(); |
| |
| // CPU targets that we care about and have hard-linked against are here. |
| // They delegate to the generic initialize above. These must all be added |
| // to the build file or you will get undefined symbol errors at link time. |
| #define LLVM_INITIALIZE_TARGET_AArch64() LLVM_INITIALIZE_GENERIC(AArch64) |
| #define LLVM_INITIALIZE_TARGET_ARM() LLVM_INITIALIZE_GENERIC(ARM) |
| #define LLVM_INITIALIZE_TARGET_RISCV() LLVM_INITIALIZE_GENERIC(RISCV) |
| #define LLVM_INITIALIZE_TARGET_X86() LLVM_INITIALIZE_GENERIC(X86) |
| #define LLVM_INITIALIZE_TARGET_WebAssembly() \ |
| LLVM_INITIALIZE_GENERIC(WebAssembly) |
| |
| // We must no-op the name of each target we don't care about. This is annoying, |
| // but targets aren't created every day and isn't the end of the world. The |
| // error messages when missing are quite clear and you just add a line here. |
| #define LLVM_INITIALIZE_TARGET_AMDGPU() |
| #define LLVM_INITIALIZE_TARGET_AVR() |
| #define LLVM_INITIALIZE_TARGET_BPF() |
| #define LLVM_INITIALIZE_TARGET_Hexagon() |
| #define LLVM_INITIALIZE_TARGET_Lanai() |
| #define LLVM_INITIALIZE_TARGET_LoongArch() |
| #define LLVM_INITIALIZE_TARGET_Mips() |
| #define LLVM_INITIALIZE_TARGET_MSP430() |
| #define LLVM_INITIALIZE_TARGET_NVPTX() |
| #define LLVM_INITIALIZE_TARGET_PowerPC() |
| #define LLVM_INITIALIZE_TARGET_Sparc() |
| #define LLVM_INITIALIZE_TARGET_SystemZ() |
| #define LLVM_INITIALIZE_TARGET_VE() |
| #define LLVM_INITIALIZE_TARGET_XCore() |
| |
| #define LLVM_TARGET(TargetName) LLVM_INITIALIZE_TARGET_##TargetName() |
| #include "llvm/Config/Targets.def" |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // __ __ ___________ ____ ____ ____ ______ __ __ // |
| // | | | | | ____\ \ / / \ \ / / / __ \ | | | | // |
| // | |__| | | |__ \ \/ / \ \/ / | | | | | | | | // |
| // | __ | | __| \_ _/ \_ _/ | | | | | | | | // |
| // | | | | | |____ | | __ | | | `--' | | `--' | // |
| // |__| |__| |_______| |__| (_ ) |__| \______/ \______/ // |
| // |/ // |
| //===----------------------------------------------------------------------===// |
| // |
| // Beware adding command-line flags here: IREE is a cross-compiler and can |
| // compile for multiple targets in a single invocation. Global flags added here |
| // apply to all targets with no way to override them from hosting applications |
| // that may need to programmatically set them per target and that's bad. |
| // |
| // Flags *must* be added to the LLVMTarget if they are target-specific and |
| // LLVMTargetOptions if they are apply to the whole backend. |
| |
| void LLVMCPUTargetCLOptions::bindOptions(OptionsBinder &binder) { |
| // Initialize LLVM targets prior to the iree-llvmcpu-list-targets CLI opt. |
| initializeLLVMTargets(); |
| |
| static llvm::cl::OptionCategory category("LLVMCPU HAL Target"); |
| |
| // General flags. |
| binder.opt<bool>( |
| "iree-llvmcpu-list-targets", listTargets, llvm::cl::cat(category), |
| llvm::cl::desc("Lists all registered targets that the LLVM backend can " |
| "generate code for."), |
| llvm::cl::ValueDisallowed, llvm::cl::callback([&](const bool &) { |
| llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs()); |
| exit(0); |
| })); |
| |
| // Target invariant flags. |
| binder.opt<std::string>( |
| "iree-llvmcpu-system-linker-path", systemLinkerPath, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Tool used to link system shared libraries produced by " |
| "IREE (for --iree-llvmcpu-link-embedded=false).")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-embedded-linker-path", embeddedLinkerPath, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Tool used to link embedded ELFs produced by IREE (for " |
| "--iree-llvmcpu-link-embedded=true).")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-wasm-linker-path", wasmLinkerPath, llvm::cl::cat(category), |
| llvm::cl::desc("Tool used to link WebAssembly modules produced by " |
| "IREE (for --iree-llvmcpu-target-triple=wasm32-*).")); |
| binder.opt<bool>( |
| "iree-llvmcpu-keep-linker-artifacts", keepLinkerArtifacts, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Keep LLVM linker target artifacts (.so/.dll/etc)")); |
| |
| // Default device options. |
| binder.opt<std::string>("iree-llvmcpu-target-triple", targetTriple, |
| llvm::cl::cat(category), |
| llvm::cl::desc("LLVM target machine triple.")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-target-cpu", targetCPU, llvm::cl::cat(category), |
| llvm::cl::desc( |
| "LLVM target machine CPU; use 'host' for your host native CPU.")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-target-cpu-features", targetCPUFeatures, |
| llvm::cl::cat(category), |
| llvm::cl::desc("LLVM target machine CPU features; use 'host' for your " |
| "host native CPU.")); |
| binder.opt<bool>( |
| "iree-llvmcpu-link-embedded", linkEmbedded, llvm::cl::cat(category), |
| llvm::cl::desc("Links binaries into a platform-agnostic ELF to be " |
| "loaded by the embedded IREE ELF loader.")); |
| binder.opt<bool>( |
| "iree-llvmcpu-link-static", linkStatic, llvm::cl::cat(category), |
| llvm::cl::desc( |
| "Links system libraries into binaries statically to isolate them " |
| "from platform dependencies needed at runtime")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-static-library-output-path", staticLibraryOutputPath, |
| llvm::cl::cat(category), |
| llvm::cl::desc( |
| "Path to output static object (EX: '/path/to/static-library.o'). " |
| "This will produce the static library at the specified path along " |
| "with a similarly named '.h' file for static linking.")); |
| binder.opt<bool>( |
| "iree-llvmcpu-debug-symbols", debugSymbols, llvm::cl::cat(category), |
| llvm::cl::desc("Generate and embed debug information (DWARF, PDB, etc)")); |
| binder.opt<bool>("iree-llvmcpu-loop-interleaving", llvmLoopInterleaving, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Enable LLVM loop interleaving opt")); |
| binder.opt<bool>("iree-llvmcpu-loop-vectorization", llvmLoopVectorization, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Enable LLVM loop vectorization opt")); |
| binder.opt<bool>("iree-llvmcpu-loop-unrolling", llvmLoopUnrolling, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Enable LLVM loop unrolling opt")); |
| binder.opt<bool>("iree-llvmcpu-slp-vectorization", llvmSLPVectorization, |
| llvm::cl::cat(category), |
| llvm::cl::desc("Enable LLVM SLP Vectorization opt")); |
| binder.opt<SanitizerKind>( |
| "iree-llvmcpu-sanitize", sanitizerKind, llvm::cl::cat(category), |
| llvm::cl::desc("Apply LLVM sanitize feature"), |
| llvm::cl::values(clEnumValN(SanitizerKind::kAddress, "address", |
| "Address sanitizer support"), |
| clEnumValN(SanitizerKind::kThread, "thread", |
| "Thread sanitizer support"))); |
| binder.opt<std::string>( |
| "iree-llvmcpu-target-abi", targetABI, llvm::cl::cat(category), |
| llvm::cl::desc("LLVM target machine ABI; specify for -mabi")); |
| binder.opt<llvm::FloatABI::ABIType>( |
| "iree-llvmcpu-target-float-abi", targetFloatABI, llvm::cl::cat(category), |
| llvm::cl::desc("LLVM target codegen enables soft float abi e.g " |
| "-mfloat-abi=softfp"), |
| llvm::cl::values( |
| clEnumValN(llvm::FloatABI::Default, "default", "Default (softfp)"), |
| clEnumValN(llvm::FloatABI::Soft, "soft", |
| "Software floating-point emulation"), |
| clEnumValN(llvm::FloatABI::Hard, "hard", |
| "Hardware floating-point instructions"))); |
| binder.opt<std::string>( |
| "iree-llvmcpu-target-data-layout", targetDataLayout, |
| llvm::cl::cat(category), |
| llvm::cl::desc("LLVM target machine data layout override.")); |
| binder.opt<unsigned>("iree-llvmcpu-target-vector-width-in-bytes", |
| targetVectorWidthInBytes, llvm::cl::cat(category), |
| llvm::cl::desc("Overrides the native vector register " |
| "width (in bytes) of the target.")); |
| binder.opt<std::string>( |
| "iree-llvmcpu-enable-ukernels", enableUkernels, llvm::cl::cat(category), |
| llvm::cl::desc("Enables ukernels in the llvmcpu backend. May be " |
| "`default`, `none`, `all`, or a comma-separated list of " |
| "specific unprefixed ukernels to enable, e.g. `mmt4d`.")); |
| binder.opt<bool>( |
| "iree-llvmcpu-link-ukernel-bitcode", linkUKernelBitcode, |
| llvm::cl::cat(category), |
| llvm::cl::desc( |
| "Link ukernel bitcode libraries into generated executables")); |
| } |
| |
| LLVMTargetOptions LLVMCPUTargetCLOptions::getTargetOptions() { |
| LLVMTargetOptions targetOptions; |
| targetOptions.systemLinkerPath = systemLinkerPath; |
| targetOptions.embeddedLinkerPath = embeddedLinkerPath; |
| targetOptions.wasmLinkerPath = wasmLinkerPath; |
| targetOptions.keepLinkerArtifacts = keepLinkerArtifacts; |
| |
| if (targetTriple.empty()) { |
| targetTriple = llvm::sys::getProcessTriple(); |
| } |
| |
| std::optional<LLVMTarget> maybeTarget = LLVMTarget::create( |
| targetTriple, targetCPU, targetCPUFeatures, linkEmbedded); |
| if (maybeTarget) { |
| targetOptions.target = *maybeTarget; |
| } else { |
| llvm::errs() << "Inconsistency in iree-llvmcpu-target-cpu-* command-line" |
| "flags. The target CPU is not properly defined.\n"; |
| } |
| LLVMTarget &target = targetOptions.target; |
| target.linkStatic = linkStatic; |
| target.staticLibraryOutput = staticLibraryOutputPath; |
| target.debugSymbols = debugSymbols; |
| target.pipelineTuningOptions.LoopInterleaving = llvmLoopInterleaving; |
| target.pipelineTuningOptions.LoopVectorization = llvmLoopVectorization; |
| target.pipelineTuningOptions.LoopUnrolling = llvmLoopUnrolling; |
| target.pipelineTuningOptions.SLPVectorization = llvmSLPVectorization; |
| target.sanitizerKind = sanitizerKind; |
| target.llvmTargetOptions.MCOptions.ABIName = targetABI; |
| target.llvmTargetOptions.FloatABIType = targetFloatABI; |
| target.dataLayout = targetDataLayout; |
| target.vectorWidthInBytes = targetVectorWidthInBytes; |
| target.ukernels = enableUkernels; |
| target.linkUkernelBitcode = linkUKernelBitcode; |
| |
| target.populateDefaultsFromTargetMachine(); |
| return targetOptions; |
| } |
| |
| } // namespace mlir::iree_compiler::IREE::HAL |