compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2020 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 #include "compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h"

 #include <mutex>

 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
 #include "mlir/IR/Builders.h"

 namespace mlir::iree_compiler::IREE::HAL {

 namespace {

 bool resolveCPUAndCPUFeatures(llvm::StringRef inCpu,
                               llvm::StringRef inCpuFeatures,
                               const llvm::Triple &triple, std::string &outCpu,
                               std::string &outCpuFeatures) {
   // Resolve "host"
   if (inCpu == "host" || inCpuFeatures == "host") {
     // If either Cpu or CpuFeatures is "host", the other must be either also
     // host or the default value.
     bool isCpuHostOrDefault =
         inCpu.empty() || inCpu == "host" || inCpu == "generic";
     bool isCpuFeaturesHostOrDefault =
         inCpuFeatures.empty() || inCpuFeatures == "host";
     if (!(isCpuHostOrDefault && isCpuFeaturesHostOrDefault)) {
       llvm::errs()
           << "error: If either cpu or CpuFeatures is `host`, the other must "
              "be either also `host` or the default value\n";
       return false;
     }
     outCpu = triple.isX86() ? llvm::sys::getHostCPUName().str() : "";
     llvm::SubtargetFeatures features;
     for (auto &feature : llvm::sys::getHostCPUFeatures()) {
       features.AddFeature(feature.first(), feature.second);
     }
     outCpuFeatures = features.getString();
   } else {
     outCpu = inCpu;
     outCpuFeatures = inCpuFeatures;
   }

   // Target-specific CPU feature tweaks that we need unconditionally.
   if (triple.isAArch64()) {
     llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures);
     // x18 is platform-reserved per the Aarch64 procedure call specification.
     targetCpuFeatures.AddFeature("reserve-x18", true);
     outCpuFeatures = targetCpuFeatures.getString();
   }

   if (outCpu.empty() || inCpu == "host" || inCpu == "generic" ||
       inCpu.starts_with("generic-")) {
     return true;
   }
   // If CPU is non-host and non-generic then we need to populate the
   // corresponding features.
   llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures);
   auto addCpuFeatures = [&](const auto &getFeaturesForCPU,
                             auto &cpuFeatureList) {
     getFeaturesForCPU(outCpu, cpuFeatureList, false);
     for (const auto &feature : cpuFeatureList) {
       targetCpuFeatures.AddFeature(feature);
     }
   };
   if (triple.isX86()) {
     llvm::SmallVector<llvm::StringRef> cpuFeatureList;
     addCpuFeatures(llvm::X86::getFeaturesForCPU, cpuFeatureList);
   } else if (triple.isRISCV64()) {
     llvm::SmallVector<std::string> cpuFeatureList;
     addCpuFeatures(llvm::RISCV::getFeaturesForCPU, cpuFeatureList);
   } else {
     llvm::errs()
         << "error: Resolution of target CPU to target CPU features is not "
            "implemented on "
            "this target architecture. Pass explicit CPU features "
            "instead of a CPU "
            "on this architecture, or implement that.\n";
     return false;
   }
   outCpuFeatures = targetCpuFeatures.getString();
   return true;
 }

 } // namespace

 LLVMTarget::LLVMTarget() {
   // LLVM loop optimization options.
   pipelineTuningOptions.LoopInterleaving = DEFAULT_LOOP_INTERLEAVING;
   pipelineTuningOptions.LoopVectorization = DEFAULT_LOOP_VECTORIZATION;
   pipelineTuningOptions.LoopUnrolling = DEFAULT_LOOP_UNROLLING;

   // LLVM SLP Auto vectorizer.
   pipelineTuningOptions.SLPVectorization = DEFAULT_SLP_VECTORIZATION;

   // LLVM optimization levels.
   // TODO(benvanik): add an option for this.
   optimizerOptLevel = llvm::OptimizationLevel::O2;
   codeGenOptLevel = llvm::CodeGenOptLevel::Aggressive;
   llvmTargetOptions.FloatABIType = DEFAULT_FLOAT_ABI;

   // Force `-ffunction-sections` so we can strip unused code.
   llvmTargetOptions.FunctionSections = true;
   llvmTargetOptions.DataSections = true;
   llvmTargetOptions.UniqueSectionNames = true;
 }

 std::optional<LLVMTarget> LLVMTarget::create(std::string_view triple,
                                              std::string_view cpu,
                                              std::string_view cpuFeatures,
                                              bool requestLinkEmbedded) {
   LLVMTarget target;
   target.linkEmbedded = requestLinkEmbedded;

   target.triple = triple;
   llvm::Triple targetTriple(target.triple);
   // Special casing if linkEmbedded.
   if (targetTriple.isWasm()) {
     // The embedded ELF loader is not supported on WebAssembly, so force it off.
     target.linkEmbedded = false;
   }
   if (target.linkEmbedded) {
     // Force the triple to something compatible with embedded linking.
     targetTriple.setVendor(llvm::Triple::VendorType::UnknownVendor);
     targetTriple.setEnvironment(llvm::Triple::EnvironmentType::EABI);
     targetTriple.setOS(llvm::Triple::OSType::UnknownOS);
     targetTriple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF);
     target.triple = targetTriple.str();
   }
   if (!resolveCPUAndCPUFeatures(cpu, cpuFeatures, llvm::Triple(triple),
                                 target.cpu, target.cpuFeatures)) {
     // Something bad happened, and our target might not be what the user expects
     // but we need to continue to avoid breaking existing users. Hopefully
     // resolveCPUAndCPUFeatures logged a helpful error already.
   }

   return target;
 }

 std::optional<LLVMTarget> LLVMTarget::createForHost() {
   auto target =
       LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host",
                          /*cpuFeatures=*/"host",
                          /*requestLinkEmbedded=*/true);
   if (target)
     target->populateDefaultsFromTargetMachine();
   return target;
 }

 void LLVMTarget::print(llvm::raw_ostream &os) const {
   os << "LLVMTarget{\n"
      << "  triple=" << triple << ", cpu=" << cpu
      << ", cpuFeatures=" << cpuFeatures << "\n"
      << "  dataLayout=" << dataLayout << "\n"
      << "  vectorWidthInBytes=" << vectorWidthInBytes << "\n"
      << "  linkEmbedded=" << linkEmbedded << "\n"
      << "  debugSymbols=" << debugSymbols << "\n"
      << "  sanitizer=" << static_cast<int>(sanitizerKind) << "\n"
      << "  staticLibraryOutput=" << staticLibraryOutput << "\n"
      << "  linkStatic=" << linkStatic << "\n"
      << "  pipelineTuningOptions={\n"
      << "    LoopInterleaving=" << pipelineTuningOptions.LoopInterleaving
      << "\n"
      << "    LoopVectorization=" << pipelineTuningOptions.LoopVectorization
      << "\n"
      << "    LoopUnrolling=" << pipelineTuningOptions.LoopUnrolling << "\n"
      << "    SLPVectorization=" << pipelineTuningOptions.SLPVectorization
      << "\n"
      << "  }, llvmTargetOptions={\n"
      << "    FloatABIType=" << static_cast<int>(llvmTargetOptions.FloatABIType)
      << "\n"
      << "  }\n"
      << "  ukernels=" << ukernels << "\n"
      << "  linkUkernelBitcode=" << linkUkernelBitcode << "\n"
      << "}\n";
 }

 void LLVMTarget::storeToConfigAttrs(MLIRContext *context,
                                     SmallVector<NamedAttribute> &config) const {
   Builder b(context);
   auto addString = [&](StringRef name, StringRef value) {
     config.emplace_back(b.getStringAttr(name), b.getStringAttr(value));
   };
   auto addBool = [&](StringRef name, bool value) {
     config.emplace_back(b.getStringAttr(name), b.getBoolAttr(value));
   };
   auto addInt64 = [&](StringRef name, int64_t value) {
     config.emplace_back(b.getStringAttr(name), b.getI64IntegerAttr(value));
   };

   addString("target_triple", triple);
   addString("cpu", cpu);
   addString("cpu_features", cpuFeatures);
   if (!dataLayout.empty()) {
     addString("data_layout", dataLayout);
   }
   if (vectorWidthInBytes != DEFAULT_VECTOR_WIDTH_IN_BYTES) {
     addInt64("native_vector_size", vectorWidthInBytes);
   }
   if (linkEmbedded != DEFAULT_LINK_EMBEDDED) {
     addBool("link_embedded", linkEmbedded);
   }
   if (debugSymbols != DEFAULT_DEBUG_SYMBOLS) {
     addBool("debug_symbols", debugSymbols);
   }
   if (linkStatic != DEFAULT_LINK_STATIC) {
     addBool("link_static", linkStatic);
   }
   if (sanitizerKind != DEFAULT_SANITIZER_KIND) {
     switch (sanitizerKind) {
     case SanitizerKind::kNone:
       addString("sanitizer", "none");
       break;
     case SanitizerKind::kAddress:
       addString("sanitizer", "address");
       break;
     case SanitizerKind::kThread:
       addString("sanitizer", "thread");
       break;
     }
   }
   if (!staticLibraryOutput.empty()) {
     addString("static_library_output", staticLibraryOutput);
   }
   if (pipelineTuningOptions.LoopInterleaving != DEFAULT_LOOP_INTERLEAVING)
     addBool("loop_interleaving", pipelineTuningOptions.LoopInterleaving);
   if (pipelineTuningOptions.LoopVectorization != DEFAULT_LOOP_VECTORIZATION)
     addBool("loop_vectorization", pipelineTuningOptions.LoopVectorization);
   if (pipelineTuningOptions.LoopUnrolling != DEFAULT_LOOP_UNROLLING)
     addBool("loop_unrolling", pipelineTuningOptions.LoopUnrolling);
   if (pipelineTuningOptions.SLPVectorization != DEFAULT_SLP_VECTORIZATION)
     addBool("slp_vectorization", pipelineTuningOptions.SLPVectorization);
   if (!llvmTargetOptions.MCOptions.ABIName.empty())
     addString("target_abi", llvmTargetOptions.MCOptions.ABIName);
   if (llvmTargetOptions.FloatABIType != DEFAULT_FLOAT_ABI) {
     switch (llvmTargetOptions.FloatABIType) {
     case llvm::FloatABI::Default:
       addString("float_abi", "default");
       break;
     case llvm::FloatABI::Soft:
       addString("float_abi", "soft");
       break;
     case llvm::FloatABI::Hard:
       addString("float_abi", "hard");
       break;
     }
   }
   if (ukernels.compare(DEFAULT_ENABLE_UKERNELS) != 0)
     addString("ukernels", ukernels);
   if (linkUkernelBitcode != DEFAULT_LINK_UKERNEL_BITCODE)
     addBool("link_ukernel_bitcode", linkUkernelBitcode);
 }

 std::optional<LLVMTarget>
 LLVMTarget::loadFromConfigAttr(Location loc, DictionaryAttr config,
                                const LLVMTarget &defaultTarget) {
   bool hasFailures = false;
   auto getString = [&](StringRef name, StringRef fallback,
                        bool required) -> StringRef {
     Attribute attr = config.get(name);
     if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) {
       return sattr.strref();
     } else {
       if (required) {
         hasFailures = true;
         emitError(loc) << "executable config '" << name
                        << "' required but not present on attribute";
       }
       return fallback;
     }
   };
   auto getOptionalString = [&](StringRef name) -> std::optional<StringRef> {
     Attribute attr = config.get(name);
     if (auto sattr = llvm::dyn_cast_if_present<StringAttr>(attr)) {
       return sattr.strref();
     } else if (attr) {
       hasFailures = true;
       emitError(loc) << "executable config '" << name
                      << "' requires string but got " << attr;
     }
     return {};
   };
   auto getBool = [&](StringRef name, bool fallback) -> bool {
     Attribute attr = config.get(name);
     if (auto battr = llvm::dyn_cast_if_present<BoolAttr>(attr)) {
       return battr.getValue();
     } else if (attr) {
       hasFailures = true;
       emitError(loc) << "executable config '" << name
                      << "' requires bool but got " << attr;
     }
     return fallback;
   };
   auto getInt64 = [&](StringRef name, int64_t fallback) -> int64_t {
     Attribute attr = config.get(name);
     if (auto iattr = llvm::dyn_cast_if_present<IntegerAttr>(attr)) {
       return iattr.getValue().getSExtValue();
     } else if (attr) {
       hasFailures = true;
       emitError(loc) << "executable config '" << name
                      << "' requires i64 but got " << attr;
     }
     return fallback;
   };

   LLVMTarget target;

   // Constructor arguments.
   auto triple = getOptionalString("target_triple");
   auto cpu = getOptionalString("cpu");
   auto cpuFeatures = getOptionalString("cpu_features");
   bool linkEmbedded = getBool("link_embedded", DEFAULT_LINK_EMBEDDED);
   if (triple || cpu || cpuFeatures) {
     if (!triple) {
       emitError(loc) << "executable config 'cpu' or 'cpu_features' must be "
                         "accompanied by 'target_triple'";
       return {};
     }
     std::optional<LLVMTarget> maybeTarget =
         LLVMTarget::create(*triple, cpu ? *cpu : "generic",
                            cpuFeatures ? *cpuFeatures : "", linkEmbedded);
     if (!maybeTarget) {
       return {};
     }
     target.copy(*maybeTarget);
   } else {
     target.copy(defaultTarget);
   }

   target.dataLayout = getString("data_layout", DEFAULT_DATA_LAYOUT, false);
   target.vectorWidthInBytes =
       getInt64("native_vector_size", DEFAULT_VECTOR_WIDTH_IN_BYTES);

   target.debugSymbols = getBool("debug_symbols", DEFAULT_DEBUG_SYMBOLS);
   target.linkStatic = getBool("link_static", DEFAULT_LINK_STATIC);
   auto sanitizer = getOptionalString("sanitizer");
   if (sanitizer) {
     if (sanitizer == "none")
       target.sanitizerKind = SanitizerKind::kNone;
     else if (sanitizer == "address")
       target.sanitizerKind = SanitizerKind::kAddress;
     else if (sanitizer == "thread")
       target.sanitizerKind = SanitizerKind::kThread;
     else {
       emitError(loc) << "executable config unexpected value for 'sanitizer': "
                      << *sanitizer;
       return {};
     }
   }
   target.staticLibraryOutput = getString("static_library_output", "", false);

   target.pipelineTuningOptions.LoopInterleaving = getBool(
       "loop_interleaving", target.pipelineTuningOptions.LoopInterleaving);
   target.pipelineTuningOptions.LoopVectorization = getBool(
       "loop_vectorization", target.pipelineTuningOptions.LoopVectorization);
   target.pipelineTuningOptions.LoopUnrolling =
       getBool("loop_unrolling", target.pipelineTuningOptions.LoopUnrolling);
   target.pipelineTuningOptions.SLPVectorization = getBool(
       "slp_vectorization", target.pipelineTuningOptions.SLPVectorization);
   auto targetAbi = getOptionalString("target_abi");
   if (targetAbi)
     target.llvmTargetOptions.MCOptions.ABIName = *targetAbi;
   auto floatAbi = getOptionalString("float_abi");
   if (floatAbi) {
     if (floatAbi == "default")
       target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
     else if (floatAbi == "soft")
       target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
     else if (floatAbi == "hard")
       target.llvmTargetOptions.FloatABIType = llvm::FloatABI::Default;
     else {
       emitError(loc) << "executable config unexpected value for 'float_abi'";
       return {};
     }
   }

   target.ukernels = getString("ukernels", target.ukernels, false);
   target.linkUkernelBitcode =
       getBool("link_ukernel_bitcode", target.linkUkernelBitcode);

   if (hasFailures) {
     return {};
   }
   target.populateDefaultsFromTargetMachine();
   return target;
 }

 void LLVMTarget::populateDefaultsFromTargetMachine() {
   // We may need the target machine for certain default values.
   std::unique_ptr<llvm::TargetMachine> cachedTargetMachine;
   auto getTargetMachine = [&]() {
     if (!cachedTargetMachine) {
       cachedTargetMachine = createTargetMachine(*this);
       // TODO(#13988): proper error propagation. This is a common user scenario.
       assert(cachedTargetMachine && "createTargetMachine failed");
     }
     return cachedTargetMachine.get();
   };

   if (dataLayout.empty()) {
     auto targetDataLayout = getTargetMachine()->createDataLayout();
     dataLayout = targetDataLayout.getStringRepresentation();
   }

   if (vectorWidthInBytes == DEFAULT_VECTOR_WIDTH_IN_BYTES) {
     auto targetMachine = getTargetMachine();
     auto targetFeatures = targetMachine->getTargetFeatureString();

     // The only way to get the real TTI is to create a function using it.
     // LLVM's TargetMachine and related APIs are terrible. Absolutely yuck.
     // Note that we use the data layout set above to either what the user
     // specified or what the target machine returned.
     //
     // If anyone comes across this: it'd be great if getTargetTransformInfo
     // could be called without requiring a function.
     llvm::LLVMContext llvmContext;
     auto llvmModule =
         std::make_unique<llvm::Module>("dummy_module", llvmContext);
     llvmModule->setDataLayout(dataLayout);
     llvm::Function *dummyFunc = llvm::Function::Create(
         llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), false),
         llvm::GlobalValue::ExternalLinkage, "dummy_func", *llvmModule);
     if (targetFeatures.contains("avx512")) {
       // Always override the vector with to 512 on systems with avx512.
       // @dcaballe says:
       // > in ML the frequency throttling that happens when using 512-bit
       // > register doesn't have an overall negative impact in performance due
       // > to the high computational density of the workloads, even on skylake
       // > where the throttling was really bad
       dummyFunc->addFnAttr("prefer-vector-width", "512");
     }
     auto targetTTI = targetMachine->getTargetTransformInfo(*dummyFunc);

     // Query the vector width from TTI.
     unsigned ttiVectorWidthInBytes =
         targetTTI.getRegisterBitWidth(
             llvm::TargetTransformInfo::RGK_FixedWidthVector) /
         8;
     vectorWidthInBytes = ttiVectorWidthInBytes > 1 ? ttiVectorWidthInBytes : 16;
   }
 }

 std::unique_ptr<llvm::TargetMachine>
 createTargetMachine(const LLVMTarget &target) {
   std::string errorMessage;
   auto llvmTarget =
       llvm::TargetRegistry::lookupTarget(target.getTriple(), errorMessage);
   if (!llvmTarget)
     return nullptr;
   std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine(
       target.getTriple(), target.getCpu() /* cpu e.g k8 */,
       target.getCpuFeatures() /* cpu features e.g avx512f */,
       target.llvmTargetOptions, llvm::Reloc::Model::PIC_, {},
       target.codeGenOptLevel,
       /*JIT=*/false));
   return machine;
 }

 static void initializeLLVMTargets() {
 // Dynamically do preprocessor dispatch to initialize only targets that we
 // care about if they are enabled. Unfortunately, the way the LLVM macros
 // for this are set up and the inability to do a conditional within a macro
 // means that we have to syntactically have a macro for every possible
 // target we care about. There are more robust ways to do this but they all
 // require build support, which is a pain to manage across platforms.
 //
 // See comments below.
 #define LLVM_INITIALIZE_GENERIC(TargetName)                                    \
   LLVMInitialize##TargetName##Target();                                        \
   LLVMInitialize##TargetName##TargetMC();                                      \
   LLVMInitialize##TargetName##TargetInfo();                                    \
   LLVMInitialize##TargetName##AsmPrinter();                                    \
   LLVMInitialize##TargetName##AsmParser();

 // CPU targets that we care about and have hard-linked against are here.
 // They delegate to the generic initialize above. These must all be added
 // to the build file or you will get undefined symbol errors at link time.
 #define LLVM_INITIALIZE_TARGET_AArch64() LLVM_INITIALIZE_GENERIC(AArch64)
 #define LLVM_INITIALIZE_TARGET_ARM() LLVM_INITIALIZE_GENERIC(ARM)
 #define LLVM_INITIALIZE_TARGET_RISCV() LLVM_INITIALIZE_GENERIC(RISCV)
 #define LLVM_INITIALIZE_TARGET_X86() LLVM_INITIALIZE_GENERIC(X86)
 #define LLVM_INITIALIZE_TARGET_WebAssembly()                                   \
   LLVM_INITIALIZE_GENERIC(WebAssembly)

 // We must no-op the name of each target we don't care about. This is annoying,
 // but targets aren't created every day and isn't the end of the world. The
 // error messages when missing are quite clear and you just add a line here.
 #define LLVM_INITIALIZE_TARGET_AMDGPU()
 #define LLVM_INITIALIZE_TARGET_AVR()
 #define LLVM_INITIALIZE_TARGET_BPF()
 #define LLVM_INITIALIZE_TARGET_Hexagon()
 #define LLVM_INITIALIZE_TARGET_Lanai()
 #define LLVM_INITIALIZE_TARGET_LoongArch()
 #define LLVM_INITIALIZE_TARGET_Mips()
 #define LLVM_INITIALIZE_TARGET_MSP430()
 #define LLVM_INITIALIZE_TARGET_NVPTX()
 #define LLVM_INITIALIZE_TARGET_PowerPC()
 #define LLVM_INITIALIZE_TARGET_Sparc()
 #define LLVM_INITIALIZE_TARGET_SystemZ()
 #define LLVM_INITIALIZE_TARGET_VE()
 #define LLVM_INITIALIZE_TARGET_XCore()

 #define LLVM_TARGET(TargetName) LLVM_INITIALIZE_TARGET_##TargetName()
 #include "llvm/Config/Targets.def"
 }

 //===----------------------------------------------------------------------===//
 //    __    __   ___________    ____     ____    ____  ______    __    __     //
 //   |  |  |  | |   ____\   \  /   /     \   \  /   / /  __  \  |  |  |  |    //
 //   |  |__|  | |  |__   \   \/   /       \   \/   / |  |  |  | |  |  |  |    //
 //   |   __   | |   __|   \_    _/         \_    _/  |  |  |  | |  |  |  |    //
 //   |  |  |  | |  |____    |  |   __        |  |    |  `--'  | |  `--'  |    //
 //   |__|  |__| |_______|   |__|  (_ )       |__|     \______/   \______/     //
 //                                 |/                                         //
 //===----------------------------------------------------------------------===//
 //
 // Beware adding command-line flags here: IREE is a cross-compiler and can
 // compile for multiple targets in a single invocation. Global flags added here
 // apply to all targets with no way to override them from hosting applications
 // that may need to programmatically set them per target and that's bad.
 //
 // Flags *must* be added to the LLVMTarget if they are target-specific and
 // LLVMTargetOptions if they are apply to the whole backend.

 void LLVMCPUTargetCLOptions::bindOptions(OptionsBinder &binder) {
   // Initialize LLVM targets prior to the iree-llvmcpu-list-targets CLI opt.
   initializeLLVMTargets();

   static llvm::cl::OptionCategory category("LLVMCPU HAL Target");

   // General flags.
   binder.opt<bool>(
       "iree-llvmcpu-list-targets", listTargets, llvm::cl::cat(category),
       llvm::cl::desc("Lists all registered targets that the LLVM backend can "
                      "generate code for."),
       llvm::cl::ValueDisallowed, llvm::cl::callback([&](const bool &) {
         llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs());
         exit(0);
       }));

   // Target invariant flags.
   binder.opt<std::string>(
       "iree-llvmcpu-system-linker-path", systemLinkerPath,
       llvm::cl::cat(category),
       llvm::cl::desc("Tool used to link system shared libraries produced by "
                      "IREE (for --iree-llvmcpu-link-embedded=false)."));
   binder.opt<std::string>(
       "iree-llvmcpu-embedded-linker-path", embeddedLinkerPath,
       llvm::cl::cat(category),
       llvm::cl::desc("Tool used to link embedded ELFs produced by IREE (for "
                      "--iree-llvmcpu-link-embedded=true)."));
   binder.opt<std::string>(
       "iree-llvmcpu-wasm-linker-path", wasmLinkerPath, llvm::cl::cat(category),
       llvm::cl::desc("Tool used to link WebAssembly modules produced by "
                      "IREE (for --iree-llvmcpu-target-triple=wasm32-*)."));
   binder.opt<bool>(
       "iree-llvmcpu-keep-linker-artifacts", keepLinkerArtifacts,
       llvm::cl::cat(category),
       llvm::cl::desc("Keep LLVM linker target artifacts (.so/.dll/etc)"));

   // Default device options.
   binder.opt<std::string>("iree-llvmcpu-target-triple", targetTriple,
                           llvm::cl::cat(category),
                           llvm::cl::desc("LLVM target machine triple."));
   binder.opt<std::string>(
       "iree-llvmcpu-target-cpu", targetCPU, llvm::cl::cat(category),
       llvm::cl::desc(
           "LLVM target machine CPU; use 'host' for your host native CPU."));
   binder.opt<std::string>(
       "iree-llvmcpu-target-cpu-features", targetCPUFeatures,
       llvm::cl::cat(category),
       llvm::cl::desc("LLVM target machine CPU features; use 'host' for your "
                      "host native CPU."));
   binder.opt<bool>(
       "iree-llvmcpu-link-embedded", linkEmbedded, llvm::cl::cat(category),
       llvm::cl::desc("Links binaries into a platform-agnostic ELF to be "
                      "loaded by the embedded IREE ELF loader."));
   binder.opt<bool>(
       "iree-llvmcpu-link-static", linkStatic, llvm::cl::cat(category),
       llvm::cl::desc(
           "Links system libraries into binaries statically to isolate them "
           "from platform dependencies needed at runtime"));
   binder.opt<std::string>(
       "iree-llvmcpu-static-library-output-path", staticLibraryOutputPath,
       llvm::cl::cat(category),
       llvm::cl::desc(
           "Path to output static object (EX: '/path/to/static-library.o'). "
           "This will produce the static library at the specified path along "
           "with a similarly named '.h' file for static linking."));
   binder.opt<bool>(
       "iree-llvmcpu-debug-symbols", debugSymbols, llvm::cl::cat(category),
       llvm::cl::desc("Generate and embed debug information (DWARF, PDB, etc)"));
   binder.opt<bool>("iree-llvmcpu-loop-interleaving", llvmLoopInterleaving,
                    llvm::cl::cat(category),
                    llvm::cl::desc("Enable LLVM loop interleaving opt"));
   binder.opt<bool>("iree-llvmcpu-loop-vectorization", llvmLoopVectorization,
                    llvm::cl::cat(category),
                    llvm::cl::desc("Enable LLVM loop vectorization opt"));
   binder.opt<bool>("iree-llvmcpu-loop-unrolling", llvmLoopUnrolling,
                    llvm::cl::cat(category),
                    llvm::cl::desc("Enable LLVM loop unrolling opt"));
   binder.opt<bool>("iree-llvmcpu-slp-vectorization", llvmSLPVectorization,
                    llvm::cl::cat(category),
                    llvm::cl::desc("Enable LLVM SLP Vectorization opt"));
   binder.opt<SanitizerKind>(
       "iree-llvmcpu-sanitize", sanitizerKind, llvm::cl::cat(category),
       llvm::cl::desc("Apply LLVM sanitize feature"),
       llvm::cl::values(clEnumValN(SanitizerKind::kAddress, "address",
                                   "Address sanitizer support"),
                        clEnumValN(SanitizerKind::kThread, "thread",
                                   "Thread sanitizer support")));
   binder.opt<std::string>(
       "iree-llvmcpu-target-abi", targetABI, llvm::cl::cat(category),
       llvm::cl::desc("LLVM target machine ABI; specify for -mabi"));
   binder.opt<llvm::FloatABI::ABIType>(
       "iree-llvmcpu-target-float-abi", targetFloatABI, llvm::cl::cat(category),
       llvm::cl::desc("LLVM target codegen enables soft float abi e.g "
                      "-mfloat-abi=softfp"),
       llvm::cl::values(
           clEnumValN(llvm::FloatABI::Default, "default", "Default (softfp)"),
           clEnumValN(llvm::FloatABI::Soft, "soft",
                      "Software floating-point emulation"),
           clEnumValN(llvm::FloatABI::Hard, "hard",
                      "Hardware floating-point instructions")));
   binder.opt<std::string>(
       "iree-llvmcpu-target-data-layout", targetDataLayout,
       llvm::cl::cat(category),
       llvm::cl::desc("LLVM target machine data layout override."));
   binder.opt<unsigned>("iree-llvmcpu-target-vector-width-in-bytes",
                        targetVectorWidthInBytes, llvm::cl::cat(category),
                        llvm::cl::desc("Overrides the native vector register "
                                       "width (in bytes) of the target."));
   binder.opt<std::string>(
       "iree-llvmcpu-enable-ukernels", enableUkernels, llvm::cl::cat(category),
       llvm::cl::desc("Enables ukernels in the llvmcpu backend. May be "
                      "`default`, `none`, `all`, or a comma-separated list of "
                      "specific unprefixed ukernels to enable, e.g. `mmt4d`."));
   binder.opt<bool>(
       "iree-llvmcpu-link-ukernel-bitcode", linkUKernelBitcode,
       llvm::cl::cat(category),
       llvm::cl::desc(
           "Link ukernel bitcode libraries into generated executables"));
 }

 LLVMTargetOptions LLVMCPUTargetCLOptions::getTargetOptions() {
   LLVMTargetOptions targetOptions;
   targetOptions.systemLinkerPath = systemLinkerPath;
   targetOptions.embeddedLinkerPath = embeddedLinkerPath;
   targetOptions.wasmLinkerPath = wasmLinkerPath;
   targetOptions.keepLinkerArtifacts = keepLinkerArtifacts;

   if (targetTriple.empty()) {
     targetTriple = llvm::sys::getProcessTriple();
   }

   std::optional<LLVMTarget> maybeTarget = LLVMTarget::create(
       targetTriple, targetCPU, targetCPUFeatures, linkEmbedded);
   if (maybeTarget) {
     targetOptions.target = *maybeTarget;
   } else {
     llvm::errs() << "Inconsistency in iree-llvmcpu-target-cpu-* command-line"
                     "flags. The target CPU is not properly defined.\n";
   }
   LLVMTarget &target = targetOptions.target;
   target.linkStatic = linkStatic;
   target.staticLibraryOutput = staticLibraryOutputPath;
   target.debugSymbols = debugSymbols;
   target.pipelineTuningOptions.LoopInterleaving = llvmLoopInterleaving;
   target.pipelineTuningOptions.LoopVectorization = llvmLoopVectorization;
   target.pipelineTuningOptions.LoopUnrolling = llvmLoopUnrolling;
   target.pipelineTuningOptions.SLPVectorization = llvmSLPVectorization;
   target.sanitizerKind = sanitizerKind;
   target.llvmTargetOptions.MCOptions.ABIName = targetABI;
   target.llvmTargetOptions.FloatABIType = targetFloatABI;
   target.dataLayout = targetDataLayout;
   target.vectorWidthInBytes = targetVectorWidthInBytes;
   target.ukernels = enableUkernels;
   target.linkUkernelBitcode = linkUKernelBitcode;

   target.populateDefaultsFromTargetMachine();
   return targetOptions;
 }

 } // namespace mlir::iree_compiler::IREE::HAL