Forking off device methods from TargetBackend->TargetDevice. (#16591) This is the start of changes allowing us to reason about devices independently from the backend used to generate code for them. For now the TargetDevice is fairly coupled to the TargetBackend it was forked from but in the future the intent is to split them further such that the TargetDevice is shared across multiple backends (to start the CPU -> VMVX/LLVM-CPU backends). When asked to provide a set of default or host executable targets each plugin implementing a TargetBackend now receives the device it's generating for. This allows for example a Metal device specifying runtime device properties (Metal version, etc) to have the Metal SPIRV target backend produce executables compatible with it. TargetDevice being separate allows us to provide those independently in plugins or in the main IREE compiler (like where the CPUTargetDevice will live). This will make composition possible such as multiple plugins contributing translation pipelines for a single device to allow for specialization. Future PRs will start to deprecate the `--iree-hal-target-backends=` flag in favor of specifying devices instead and then allowing each device to have its own default backends specified if the user wants global flags (`--iree-hal-cpu-target-backends=llvm-cpu,vmvx`, etc). For now it's going to remain and there's a link back from backends to devices for automatic selection.

commit: 890b070bd94a62dc90203ed0514b212836af46ac [log] [tgz]
author: Ben Vanik <ben.vanik@gmail.com> Thu Feb 29 10:28:37 2024 -0800
committer: GitHub <noreply@github.com> Thu Feb 29 10:28:37 2024 -0800
tree: 88f433fdc59301c7b6f77555c1e9b1f073ed206d
parent: b2250d8ff4c81ab51095807944fe5c6809618ae0 [diff]
diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp
index 6381319..0316cec 100644
--- a/compiler/plugins/target/CUDA/CUDATarget.cpp
+++ b/compiler/plugins/target/CUDA/CUDATarget.cpp

@@ -362,11 +362,60 @@
   mpm.run(module, mam);
 }
 
+class CUDATargetDevice final : public TargetDevice {
+public:
+  CUDATargetDevice(const CUDAOptions &options) : options(options) {}
+
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+
+    // TODO: device configuration attrs.
+    auto configAttr = b.getDictionaryAttr(configItems);
+
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("cuda")->getDefaultExecutableTargets(
+        context, "cuda", configAttr, executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("cuda"),
+                                            configAttr, executableTargetAttrs);
+  }
+
+private:
+  const CUDAOptions &options;
+};
+
 class CUDATargetBackend final : public TargetBackend {
 public:
   CUDATargetBackend(const CUDAOptions &options) : options(options) {}
 
-  std::string name() const override { return "cuda"; }
+  std::string getLegacyDefaultDeviceID() const override { return "cuda"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(getExecutableTarget(context));
+  }
+
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context) const {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    addConfig("target_arch", b.getStringAttr(options.clTargetChip));
+
+    return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+        b.getStringAttr("cuda"), b.getStringAttr("cuda-nvptx-fb"),
+        b.getDictionaryAttr(configItems));
+  }
 
   void getDependentDialects(DialectRegistry &registry) const override {
     // TODO: Derive the use of TransformDialect from inner
@@ -379,24 +428,6 @@
     mlir::registerNVVMDialectTranslation(registry);
   }
 
-  IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    // TODO: device configuration attrs.
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-
-    // If we had multiple target environments we would generate one target attr
-    // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(getExecutableTarget(context));
-
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
-  }
-
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
                                       OpPassManager &passManager) override {
     // For now we disable configuration if the variant has external object
@@ -645,23 +676,6 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-    // Add some configurations to the `hal.executable.target` attribute.
-    auto addConfig = [&](StringRef name, Attribute value) {
-      configItems.emplace_back(StringAttr::get(context, name), value);
-    };
-    // Set target arch
-    addConfig("target_arch", StringAttr::get(context, options.clTargetChip));
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, b.getStringAttr("cuda"), b.getStringAttr("cuda-nvptx-fb"),
-        configAttr);
-  }
-
   const CUDAOptions &options;
 };
 
@@ -669,8 +683,12 @@
 struct CUDASession
     : public PluginSession<CUDASession, CUDAOptions,
                            PluginActivationPolicy::DefaultActivated> {
-  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
     // #hal.device.target<"cuda", ...
+    targets.add("cuda",
+                [&]() { return std::make_shared<CUDATargetDevice>(options); });
+  }
+  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
     // #hal.executable.target<"cuda", ...
     targets.add("cuda", [&]() {
       LLVMInitializeNVPTXTarget();

diff --git a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp
index 9ea8853..7f1e822 100644
--- a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp
+++ b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp

@@ -105,21 +105,14 @@
       spirv::DeviceType::IntegratedGPU, spirv::TargetEnvAttr::kUnknownDeviceID);
 }
 
-class MetalSPIRVTargetBackend : public TargetBackend {
+// TODO: MetalOptions for choosing the Metal version.
+class MetalTargetDevice : public TargetDevice {
 public:
-  MetalSPIRVTargetBackend(const MetalSPIRVOptions &options)
-      : options(options) {}
-
-  // NOTE: we could vary this based on the options such as 'metal-v2'.
-  std::string name() const override { return "metal"; }
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<gpu::GPUDialect, IREE::Codegen::IREECodegenDialect,
-                    IREE::Flow::FlowDialect, spirv::SPIRVDialect>();
-  }
+  MetalTargetDevice(const MetalSPIRVOptions &options) : options(options) {}
 
   IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -127,12 +120,53 @@
 
     // If we had multiple target environments we would generate one target attr
     // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(
-        getExecutableTarget(context, getMetalTargetEnv(context)));
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("metal-spirv")
+        ->getDefaultExecutableTargets(context, "metal", configAttr,
+                                      executableTargetAttrs);
 
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("metal"),
+                                            configAttr, executableTargetAttrs);
+  }
+
+private:
+  const MetalSPIRVOptions &options;
+};
+
+class MetalSPIRVTargetBackend : public TargetBackend {
+public:
+  MetalSPIRVTargetBackend(const MetalSPIRVOptions &options)
+      : options(options) {}
+
+  std::string getLegacyDefaultDeviceID() const override { return "metal"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(
+        getExecutableTarget(context, getMetalTargetEnv(context)));
+  }
+
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context,
+                      spirv::TargetEnvAttr targetEnv) const {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    addConfig(spirv::getTargetEnvAttrName(), targetEnv);
+
+    return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+        b.getStringAttr("metal-spirv"), b.getStringAttr("metal-msl-fb"),
+        b.getDictionaryAttr(configItems));
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<gpu::GPUDialect, IREE::Codegen::IREECodegenDialect,
+                    IREE::Flow::FlowDialect, spirv::SPIRVDialect>();
   }
 
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
@@ -286,46 +320,33 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context,
-                      spirv::TargetEnvAttr targetEnv) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    configItems.emplace_back(b.getStringAttr(spirv::getTargetEnvAttrName()),
-                             targetEnv);
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, b.getStringAttr("metal"), b.getStringAttr("metal-msl-fb"),
-        configAttr);
-  }
-
   const MetalSPIRVOptions &options;
 };
 
 struct MetalSPIRVSession
     : public PluginSession<MetalSPIRVSession, MetalSPIRVOptions,
                            PluginActivationPolicy::DefaultActivated> {
-  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
-    auto backendFactory = [=]() {
-      return std::make_shared<MetalSPIRVTargetBackend>(options);
-    };
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
     // #hal.device.target<"metal", ...
-    targets.add("metal", backendFactory);
+    targets.add("metal",
+                [=]() { return std::make_shared<MetalTargetDevice>(options); });
+  }
+  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
     // #hal.executable.target<"metal-spirv", ...
-    targets.add("metal-spirv", backendFactory);
+    targets.add("metal-spirv", [=]() {
+      return std::make_shared<MetalSPIRVTargetBackend>(options);
+    });
   }
 };
 
 } // namespace mlir::iree_compiler::IREE::HAL
 
+IREE_DEFINE_COMPILER_OPTION_FLAGS(
+    mlir::iree_compiler::IREE::HAL::MetalSPIRVOptions);
+
 extern "C" bool iree_register_compiler_plugin_hal_target_metal_spirv(
     mlir::iree_compiler::PluginRegistrar *registrar) {
   registrar->registerPlugin<mlir::iree_compiler::IREE::HAL::MetalSPIRVSession>(
       "hal_target_metal_spirv");
   return true;
 }
-
-IREE_DEFINE_COMPILER_OPTION_FLAGS(
-    mlir::iree_compiler::IREE::HAL::MetalSPIRVOptions);

diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index 25d1eee..a1e6818 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp

@@ -132,11 +132,72 @@
   }
 }
 
+class ROCMTargetDevice final : public TargetDevice {
+public:
+  ROCMTargetDevice(const ROCMOptions &options) : options(options) {}
+
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    // Indicates that the runtime HAL driver operates only in the legacy
+    // synchronous mode.
+    addConfig("legacy_sync", b.getUnitAttr());
+
+    auto configAttr = b.getDictionaryAttr(configItems);
+
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("rocm")->getDefaultExecutableTargets(
+        context, "rocm", configAttr, executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("rocm"),
+                                            configAttr, executableTargetAttrs);
+  }
+
+private:
+  const ROCMOptions &options;
+};
+
 class ROCMTargetBackend final : public TargetBackend {
 public:
   ROCMTargetBackend(const ROCMOptions &options) : options(options) {}
 
-  std::string name() const override { return "rocm"; }
+  std::string getLegacyDefaultDeviceID() const override { return "rocm"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(getExecutableTarget(context));
+  }
+
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context) const {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    addConfig("target_arch", b.getStringAttr(options.targetChip));
+    addConfig("ukernels", b.getStringAttr(options.enableROCMUkernels));
+
+    ArrayAttr mmaAttrs = getROCMSupportedMmaAttrs(context, options.targetChip);
+    if (mmaAttrs) {
+      addConfig("mma_intrinsics", mmaAttrs);
+    }
+
+    return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+        b.getStringAttr("rocm"), b.getStringAttr("rocm-hsaco-fb"),
+        b.getDictionaryAttr(configItems));
+  }
 
   void getDependentDialects(DialectRegistry &registry) const override {
     mlir::registerBuiltinDialectTranslation(registry);
@@ -148,25 +209,27 @@
     registry.insert<amdgpu::AMDGPUDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
+  void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
+                                      OpPassManager &passManager) override {
+    // For now we disable configuration if the variant has external object
+    // files.
+    if (variantOp.isExternal())
+      return;
 
-    // Indicates that the runtime HAL driver operates only in the legacy
-    // synchronous mode.
-    configItems.emplace_back(b.getStringAttr("legacy_sync"), b.getUnitAttr());
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-
-    // If we had multiple target environments we would generate one target attr
-    // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(getExecutableTarget(context));
-
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
+    buildLLVMGPUCodegenConfigurationPassPipeline(passManager);
   }
+
+  void buildTranslationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
+                                    OpPassManager &passManager) override {
+    // For now we disable translation if the variant has external object files.
+    // We could instead perform linking with those objects (if they're bitcode
+    // ala libdevice.bc, etc).
+    if (variantOp.isExternal())
+      return;
+
+    buildLLVMGPUCodegenPassPipeline(passManager, true);
+  }
+
   // Performs optimizations on |module| (including LTO-style whole-program
   // ones). Inspired by code section in
   // https://github.com/openxla/iree/blob/main/compiler/plugins/target/CUDA/CUDATarget.cpp
@@ -204,27 +267,6 @@
     mpm.run(module, mam);
   }
 
-  void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
-                                      OpPassManager &passManager) override {
-    // For now we disable configuration if the variant has external object
-    // files.
-    if (variantOp.isExternal())
-      return;
-
-    buildLLVMGPUCodegenConfigurationPassPipeline(passManager);
-  }
-
-  void buildTranslationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
-                                    OpPassManager &passManager) override {
-    // For now we disable translation if the variant has external object files.
-    // We could instead perform linking with those objects (if they're bitcode
-    // ala libdevice.bc, etc).
-    if (variantOp.isExternal())
-      return;
-
-    buildLLVMGPUCodegenPassPipeline(passManager, true);
-  }
-
   LogicalResult serializeExecutable(const SerializationOptions &serOptions,
                                     IREE::HAL::ExecutableVariantOp variantOp,
                                     OpBuilder &executableBuilder) override {
@@ -460,30 +502,6 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-    // Add some configurations to the `hal.executable.target` attribute.
-    auto addConfig = [&](StringRef name, Attribute value) {
-      configItems.emplace_back(StringAttr::get(context, name), value);
-    };
-    // Set target arch
-    addConfig("target_arch", StringAttr::get(context, options.targetChip));
-
-    addConfig("ukernels", StringAttr::get(context, options.enableROCMUkernels));
-
-    ArrayAttr mmaAttrs = getROCMSupportedMmaAttrs(context, options.targetChip);
-    if (mmaAttrs) {
-      addConfig("mma_intrinsics", mmaAttrs);
-    }
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, b.getStringAttr("rocm"), b.getStringAttr("rocm-hsaco-fb"),
-        configAttr);
-  }
-
   const ROCMOptions &options;
 };
 
@@ -491,12 +509,20 @@
 struct ROCMSession
     : public PluginSession<ROCMSession, ROCMOptions,
                            PluginActivationPolicy::DefaultActivated> {
-  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
     if (options.bitcodeDirectory.empty()) {
       options.bitcodeDirectory = findPlatformLibDirectory("rocm");
     }
 
     // #hal.device.target<"rocm", ...
+    targets.add("rocm",
+                [&]() { return std::make_shared<ROCMTargetDevice>(options); });
+  }
+  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
+    if (options.bitcodeDirectory.empty()) {
+      options.bitcodeDirectory = findPlatformLibDirectory("rocm");
+    }
+
     // #hal.executable.target<"rocm", ...
     targets.add("rocm", [&]() {
       LLVMInitializeAMDGPUTarget();

diff --git a/compiler/plugins/target/ROCM/test/target_device_features.mlir b/compiler/plugins/target/ROCM/test/target_device_features.mlir
index 9f8bf60..35379d7 100644
--- a/compiler/plugins/target/ROCM/test/target_device_features.mlir
+++ b/compiler/plugins/target/ROCM/test/target_device_features.mlir

@@ -1,5 +1,5 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=rocm},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx940 %s | FileCheck %s --check-prefix=MI300
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=rocm},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx942 %s | FileCheck %s --check-prefix=MI300
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=rocm},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx940 %s | FileCheck %s --check-prefix=MI300
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=rocm},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-rocm-target-chip=gfx942 %s | FileCheck %s --check-prefix=MI300
 
 // MI300: mma_intrinsics = [#iree_gpu.mfma_layout<F16_16x16x16_F32>, #iree_gpu.mfma_layout<F16_32x32x8_F32>]
 

diff --git a/compiler/plugins/target/VMVX/VMVXTarget.cpp b/compiler/plugins/target/VMVX/VMVXTarget.cpp
index abf72c3..f7e3a8d 100644
--- a/compiler/plugins/target/VMVX/VMVXTarget.cpp
+++ b/compiler/plugins/target/VMVX/VMVXTarget.cpp

@@ -41,29 +41,26 @@
 static IREE::HAL::ExecutableTargetAttr
 getVMVXExecutableTarget(bool enableMicrokernels, MLIRContext *context,
                         StringRef backend, StringRef format) {
-  SmallVector<NamedAttribute> config;
-  config.emplace_back(
-      StringAttr::get(context, "ukernels"),
-      StringAttr::get(context, enableMicrokernels ? "all" : "none"));
-  return IREE::HAL::ExecutableTargetAttr::get(
-      context, StringAttr::get(context, backend),
-      StringAttr::get(context, format), DictionaryAttr::get(context, config));
+  Builder b(context);
+  SmallVector<NamedAttribute> configItems;
+
+  configItems.emplace_back(
+      b.getStringAttr("ukernels"),
+      b.getStringAttr(enableMicrokernels ? "all" : "none"));
+
+  return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+      b.getStringAttr(backend), b.getStringAttr(format),
+      b.getDictionaryAttr(configItems));
 }
 
-class VMVXTargetBackend final : public TargetBackend {
+// TODO(benvanik): move to a CPU device registration outside of VMVX.
+class VMVXTargetDevice final : public TargetDevice {
 public:
-  VMVXTargetBackend(const VMVXOptions &options) : options(options) {}
-
-  std::string name() const override { return "vmvx"; }
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::VM::VMDialect,
-                    IREE::VMVX::VMVXDialect,
-                    IREE::LinalgExt::IREELinalgExtDialect>();
-  }
+  VMVXTargetDevice() = default;
 
   IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -71,17 +68,49 @@
 
     // If we had multiple target environments we would generate one target attr
     // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(getVMVXExecutableTarget(
-        options.enableMicrokernels, context, "vmvx", "vmvx-bytecode-fb"));
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("vmvx")->getDefaultExecutableTargets(
+        context, "vmvx", configAttr, executableTargetAttrs);
 
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("vmvx"),
+                                            configAttr, executableTargetAttrs);
   }
 
   std::optional<IREE::HAL::DeviceTargetAttr>
-  getHostDeviceTarget(MLIRContext *context) const override {
-    return getDefaultDeviceTarget(context);
+  getHostDeviceTarget(MLIRContext *context,
+                      const TargetRegistry &targetRegistry) const override {
+    return getDefaultDeviceTarget(context, targetRegistry);
+  }
+};
+
+class VMVXTargetBackend final : public TargetBackend {
+public:
+  VMVXTargetBackend(const VMVXOptions &options) : options(options) {}
+
+  std::string getLegacyDefaultDeviceID() const override { return "vmvx"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(getVMVXExecutableTarget(
+        options.enableMicrokernels, context, "vmvx", "vmvx-bytecode-fb"));
+  }
+
+  void getHostExecutableTargets(MLIRContext *context, StringRef deviceID,
+                                DictionaryAttr deviceConfigAttr,
+                                SmallVectorImpl<IREE::HAL::ExecutableTargetAttr>
+                                    &executableTargetAttrs) const override {
+    executableTargetAttrs.push_back(getVMVXExecutableTarget(
+        options.enableMicrokernels, context, "vmvx", "vmvx-bytecode-fb"));
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::VM::VMDialect,
+                    IREE::VMVX::VMVXDialect,
+                    IREE::LinalgExt::IREELinalgExtDialect>();
   }
 
   IREE::VM::TargetOptions
@@ -171,19 +200,13 @@
   const VMVXOptions &options;
 };
 
-class VMVXInlineTargetBackend final : public TargetBackend {
+class VMVXInlineTargetDevice final : public TargetDevice {
 public:
-  VMVXInlineTargetBackend(const VMVXOptions &options) : options(options) {}
-
-  std::string name() const override { return "vmvx-inline"; }
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry
-        .insert<IREE::Codegen::IREECodegenDialect, IREE::VMVX::VMVXDialect>();
-  }
+  VMVXInlineTargetDevice() = default;
 
   IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -191,12 +214,36 @@
 
     // If we had multiple target environments we would generate one target attr
     // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(getVMVXExecutableTarget(
-        options.enableMicrokernels, context, "vmvx-inline", "vmvx-ir"));
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("vmvx-inline")
+        ->getDefaultExecutableTargets(context, "vmvx-inline", configAttr,
+                                      executableTargetAttrs);
 
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
+    return IREE::HAL::DeviceTargetAttr::get(context,
+                                            b.getStringAttr("vmvx-inline"),
+                                            configAttr, executableTargetAttrs);
+  }
+};
+
+class VMVXInlineTargetBackend final : public TargetBackend {
+public:
+  VMVXInlineTargetBackend(const VMVXOptions &options) : options(options) {}
+
+  std::string getLegacyDefaultDeviceID() const override {
+    return "vmvx-inline";
+  }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(getVMVXExecutableTarget(
+        options.enableMicrokernels, context, "vmvx-inline", "vmvx-ir"));
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry
+        .insert<IREE::Codegen::IREECodegenDialect, IREE::VMVX::VMVXDialect>();
   }
 
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
@@ -217,12 +264,19 @@
 struct VMVXSession
     : public PluginSession<VMVXSession, VMVXOptions,
                            PluginActivationPolicy::DefaultActivated> {
-  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
+    // TODO(benvanik): move to a CPU device registration outside of VMVX. Note
+    // that the inline device does need to be special.
     // #hal.device.target<"vmvx", ...
+    targets.add("vmvx", [&]() { return std::make_shared<VMVXTargetDevice>(); });
+    // #hal.device.target<"vmvx-inline", ...
+    targets.add("vmvx-inline",
+                [&]() { return std::make_shared<VMVXInlineTargetDevice>(); });
+  }
+  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
     // #hal.executable.target<"vmvx", ...
     targets.add("vmvx",
                 [&]() { return std::make_shared<VMVXTargetBackend>(options); });
-    // #hal.device.target<"vmvx-inline", ...
     // #hal.executable.target<"vmvx-inline", ...
     targets.add("vmvx-inline", [&]() {
       return std::make_shared<VMVXInlineTargetBackend>(options);

diff --git a/compiler/plugins/target/WebGPUSPIRV/WebGPUSPIRVTarget.cpp b/compiler/plugins/target/WebGPUSPIRV/WebGPUSPIRVTarget.cpp
index dd39ebd..3fd3b22 100644
--- a/compiler/plugins/target/WebGPUSPIRV/WebGPUSPIRVTarget.cpp
+++ b/compiler/plugins/target/WebGPUSPIRV/WebGPUSPIRVTarget.cpp

@@ -54,13 +54,64 @@
       spirv::DeviceType::Unknown, spirv::TargetEnvAttr::kUnknownDeviceID);
 }
 
+// TODO: WebGPUOptions for choosing the version/extensions/etc.
+class WebGPUTargetDevice : public TargetDevice {
+public:
+  WebGPUTargetDevice(const WebGPUSPIRVOptions &options) : options(options) {}
+
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+
+    auto configAttr = b.getDictionaryAttr(configItems);
+
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("webgpu-spirv")
+        ->getDefaultExecutableTargets(context, "webgpu", configAttr,
+                                      executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("webgpu"),
+                                            configAttr, executableTargetAttrs);
+  }
+
+private:
+  const WebGPUSPIRVOptions &options;
+};
+
 class WebGPUSPIRVTargetBackend : public TargetBackend {
 public:
   WebGPUSPIRVTargetBackend(const WebGPUSPIRVOptions &options)
       : options(options) {}
 
-  // NOTE: we could vary this based on the options such as 'webgpu-v2'.
-  std::string name() const override { return "webgpu"; }
+  std::string getLegacyDefaultDeviceID() const override { return "webgpu"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(
+        getExecutableTarget(context, getWebGPUTargetEnv(context)));
+  }
+
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context,
+                      spirv::TargetEnvAttr targetEnv) const {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    addConfig(spirv::getTargetEnvAttrName(), targetEnv);
+
+    return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+        b.getStringAttr("webgpu-spirv"), b.getStringAttr("webgpu-wgsl-fb"),
+        b.getDictionaryAttr(configItems));
+  }
 
   // TODO(scotttodd): Prune FlowDialect dep when WGSLReplacePushConstantsPass
   //     does not use the Flow dialect (TranslateExecutables calls this
@@ -71,23 +122,6 @@
                     spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-
-    // If we had multiple target environments we would generate one target attr
-    // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(
-        getExecutableTarget(context, getWebGPUTargetEnv(context)));
-
-    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("webgpu"),
-                                            configAttr, targetAttrs);
-  }
-
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
                                       OpPassManager &passManager) override {
     // For now we disable configuration if the variant has external object
@@ -259,35 +293,23 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context,
-                      spirv::TargetEnvAttr targetEnv) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    configItems.emplace_back(b.getStringAttr(spirv::getTargetEnvAttrName()),
-                             targetEnv);
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, b.getStringAttr("webgpu-spirv"),
-        b.getStringAttr("webgpu-wgsl-fb"), configAttr);
-  }
-
   const WebGPUSPIRVOptions &options;
 };
 
 struct WebGPUSPIRVSession
     : public PluginSession<WebGPUSPIRVSession, WebGPUSPIRVOptions,
                            PluginActivationPolicy::DefaultActivated> {
-  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
-    auto backendFactory = [=]() {
-      return std::make_shared<WebGPUSPIRVTargetBackend>(options);
-    };
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
     // #hal.device.target<"webgpu", ...
-    targets.add("webgpu", backendFactory);
+    targets.add("webgpu", [=]() {
+      return std::make_shared<WebGPUTargetDevice>(options);
+    });
+  }
+  void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
     // #hal.executable.target<"webgpu-spirv", ...
-    targets.add("webgpu-spirv", backendFactory);
+    targets.add("webgpu-spirv", [=]() {
+      return std::make_shared<WebGPUSPIRVTargetBackend>(options);
+    });
   }
 };
 

diff --git a/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp b/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp
index 5c39fc3..28e21dd 100644
--- a/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp
+++ b/compiler/src/iree/compiler/API/Internal/CompilerDriver.cpp

@@ -345,10 +345,15 @@
         pluginActivationStatus = pluginSession.activatePlugins(&context);
 
         // Initialize target registry, bootstrapping with the static globals.
-        targetRegistry.mergeFrom(IREE::HAL::TargetBackendRegistry::getGlobal());
-        IREE::HAL::TargetBackendList pluginTargetList;
-        pluginSession.populateHALTargetBackends(pluginTargetList);
-        targetRegistry.mergeFrom(pluginTargetList);
+        // TODO(15468): remove the static registration mechanism so the merge
+        // from global is not required.
+        targetRegistry.mergeFrom(IREE::HAL::TargetRegistry::getGlobal());
+        IREE::HAL::TargetBackendList pluginTargetBackendList;
+        pluginSession.populateHALTargetBackends(pluginTargetBackendList);
+        targetRegistry.mergeFrom(pluginTargetBackendList);
+        IREE::HAL::TargetDeviceList pluginTargetDeviceList;
+        pluginSession.populateHALTargetDevices(pluginTargetDeviceList);
+        targetRegistry.mergeFrom(pluginTargetDeviceList);
       }
     }
     return pluginActivationStatus;
@@ -368,8 +373,8 @@
   PluginManagerOptions pluginManagerOptions;
   PluginManagerSession pluginSession;
 
-  // We initialize the TargetBackendRegistry lazily with the plugins.
-  IREE::HAL::TargetBackendRegistry targetRegistry;
+  // We initialize the TargetRegistry lazily with the plugins.
+  IREE::HAL::TargetRegistry targetRegistry;
 
   // We lazily activate plugins on the first invocation. This allows plugin
   // activation to be configured at the session level via the API, if
@@ -715,7 +720,7 @@
   auto &targetRegistry = session.targetRegistry;
   pipelineHooks.buildConstEvalPassPipelineCallback =
       [&targetRegistry](OpPassManager &pm) {
-        pm.addPass(ConstEval::createJitGlobalsPass(targetRegistry));
+        pm.addPass(ConstEval::createJitGlobalsPass({&targetRegistry}));
       };
   // The PluginSession implements PipelineExtensions and delegates it to
   // activated plugins.

diff --git a/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp b/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp
index 9163335..0ea6a3b 100644
--- a/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp
+++ b/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp

@@ -31,7 +31,8 @@
 using namespace mlir;
 
 using mlir::iree_compiler::IREE::HAL::TargetBackendList;
-using mlir::iree_compiler::IREE::HAL::TargetBackendRegistry;
+using mlir::iree_compiler::IREE::HAL::TargetDeviceList;
+using mlir::iree_compiler::IREE::HAL::TargetRegistry;
 
 #if defined(_MSC_VER)
 #define fileno _fileno
@@ -99,10 +100,14 @@
   // of target backends. However, no such layering exists for the opt tool.
   // Since it tests passes that are default initialized, we just configure the
   // global registry that such constructors depend on.
-  TargetBackendList pluginBackendList;
-  pluginSession.populateHALTargetBackends(pluginBackendList);
-  const_cast<TargetBackendRegistry &>(TargetBackendRegistry::getGlobal())
-      .mergeFrom(pluginBackendList);
+  TargetBackendList pluginTargetBackendList;
+  pluginSession.populateHALTargetBackends(pluginTargetBackendList);
+  const_cast<TargetRegistry &>(TargetRegistry::getGlobal())
+      .mergeFrom(pluginTargetBackendList);
+  TargetDeviceList pluginTargetDeviceList;
+  pluginSession.populateHALTargetDevices(pluginTargetDeviceList);
+  const_cast<TargetRegistry &>(TargetRegistry::getGlobal())
+      .mergeFrom(pluginTargetDeviceList);
 
   // When reading from stdin and the input is a tty, it is often a user mistake
   // and the process "appears to be stuck". Print a message to let the user know

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
index ee8b5b3..0de5488 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @conv_112x112x512 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -64,7 +64,7 @@
   ]>
 ]>
 hal.executable @conv_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -118,7 +118,7 @@
   ]>
 ]>
 hal.executable @conv_16x16x16 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -172,7 +172,7 @@
   ]>
 ]>
 hal.executable @dwconv_28x28x144 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -226,7 +226,7 @@
   ]>
 ]>
 hal.executable @dwconv_4x4x8 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
index ee57bda..e48b5fd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -64,7 +64,7 @@
   ]>
 ]>
 hal.executable @matmul_3136x24x96 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -118,7 +118,7 @@
   ]>
 ]>
 hal.executable @matmul_196x64x192 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -172,7 +172,7 @@
   ]>
 ]>
 hal.executable @matmul_12544x96x16 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -218,7 +218,7 @@
   ]>
 ]>
 hal.executable @matmul_49x160x576 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -270,7 +270,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x384x384 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
@@ -324,7 +324,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x8x8 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
index 66fed52..d0a17c7 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir

@@ -9,7 +9,7 @@
 ]>
 
 hal.executable private @nhwc_conv_pointwise_2x64x64x320 {
-  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
index a2641fe..985f8a8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir

@@ -8,7 +8,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_f32_16x4096x40x4096 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
@@ -52,7 +52,7 @@
   ]>
 ]>
 hal.executable @matmul_f16_64x640x320 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float16], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
@@ -98,7 +98,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_f32_16x4096x40x4096 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
@@ -144,7 +144,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_f16_1x4096x4096x512 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
@@ -197,7 +197,7 @@
   ]>
 ]>
 hal.executable @matmul_multi_reduce_i4xf32xf32 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
index 27b9758..ce4512d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir

@@ -161,7 +161,7 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x8x512x64 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
index 2bcc068..d77a064 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir

@@ -19,7 +19,7 @@
   ]>
 ]>
 hal.executable private @nhwc_conv_pointwise_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -82,7 +82,7 @@
 ]>
 
 hal.executable private @nchw_conv_2x1280x8x8 {
-  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
index 271c36b..5096234 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir

@@ -100,7 +100,7 @@
   ]>
 ]>
 hal.executable private @static_1d_fft_stage2 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirvfb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirvfb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -145,7 +145,7 @@
   ]>
 ]>
 hal.executable private @static_3d_fft_stage3 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirvfb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirvfb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
index 0d7c3ab..e6f33c0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_1x3x32 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -65,7 +65,7 @@
   ]>
 ]>
 hal.executable private @matmul_64x16xi8 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -119,7 +119,7 @@
   ]>
 ]>
 hal.executable private @matmul_64x16xi64 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, Int64], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -173,7 +173,7 @@
   ]>
 ]>
 hal.executable @matmul_400x273 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -238,7 +238,7 @@
   ]>
 ]>
 hal.executable @matmul_25x546 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -312,7 +312,7 @@
   ]>
 ]>
 hal.executable private @matmul_pointwise_256x1024 {
-  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir
index df8a0ca..dc16b19 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir

@@ -9,7 +9,7 @@
   ]>
 ]>
 hal.executable private @complex_executable {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -18,7 +18,7 @@
     }>) {
     hal.executable.export public @complex_view_as_real ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
-      %x, %y, %z = flow.dispatch.workgroup_count_from_slice 
+      %x, %y, %z = flow.dispatch.workgroup_count_from_slice
       hal.return %x, %y, %z : index, index, index
     }
     builtin.module {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
index 8aec774..dee5040 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir

@@ -7,7 +7,7 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce_f32 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -62,7 +62,7 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce_f16 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float16, GroupNonUniformShuffle], []>, Unknown:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
@@ -120,7 +120,7 @@
 ]>
 
 hal.executable private @subgroup_reduce_dynamic {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, GroupNonUniformShuffle], []>, api=Vulkan, Unknown:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 65536,
       max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
index bb24848..5cb4a52 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @conv_112x112x512 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -65,7 +65,7 @@
   ]>
 ]>
 hal.executable @conv_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -119,7 +119,7 @@
   ]>
 ]>
 hal.executable @conv_16x16x16 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -172,7 +172,7 @@
   ]>
 ]>
 hal.executable @dwconv_28x28x144 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -226,7 +226,7 @@
   ]>
 ]>
 hal.executable @dwconv_1x2x8 {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
index 8ea74bd..d7e3f52 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -62,7 +62,7 @@
   ]>
 ]>
 hal.executable @matmul_3136x24x96 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -115,7 +115,7 @@
   ]>
 ]>
 hal.executable @matmul_196x64x192 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -169,7 +169,7 @@
   ]>
 ]>
 hal.executable @matmul_12544x96x16 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -216,7 +216,7 @@
   ]>
 ]>
 hal.executable @matmul_49x160x576 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -271,7 +271,7 @@
 ]>
 
 hal.executable @matmul_2x1024x576 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -327,7 +327,7 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512xi8 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -379,7 +379,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x384x384 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -434,7 +434,7 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x2x8 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -489,7 +489,7 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x2x512 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -548,7 +548,7 @@
 ]>
 
 hal.executable @generic_batch_matmul_8x2500x512x4608 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
index 12cb667..46f5e8e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir

@@ -8,7 +8,7 @@
   ]>
 ]>
 hal.executable @matmul_4x4096x9216 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,
@@ -59,7 +59,7 @@
   ]>
 ]>
 hal.executable @matmul_1x4096x9216 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,
@@ -110,7 +110,7 @@
   ]>
 ]>
 hal.executable private @multi_reduction_transposed_b_matmul {
-  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index 955e525..fbf6010 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir

@@ -173,7 +173,7 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x8x512x64 {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
index edeeda2..9c25f17 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir

@@ -47,7 +47,7 @@
 // -----
 
 hal.executable private @emulate_1d_vector {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export public @emulate_1d_vector ordinal(0)
       layout(#hal.pipeline.layout<push_constants = 0,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
index 5aa906f..ca127da 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir

@@ -14,7 +14,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -50,7 +50,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -87,7 +87,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -124,7 +124,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -161,7 +161,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -198,7 +198,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -235,7 +235,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -272,7 +272,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -574,7 +574,7 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -640,7 +640,7 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -706,7 +706,7 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -772,7 +772,7 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_2d_nhwc_hwc {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
@@ -810,7 +810,7 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_2d_nhwc_hwc {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir
index 7068279..cda0bbb 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir

@@ -6,7 +6,7 @@
 //
 // For such case we can link all executables into one, with just one variant.
 
-#vulkan_target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan"]}>
+#vulkan_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}>
 
 #pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -186,10 +186,10 @@
 // For such case we need to link into multiple executables, with each one
 // having one variant containing all entry points needing the same target.
 
-#vulkan_target_0 = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
-  iree.spirv.features = ["vulkan"]}>
-#vulkan_target_1 = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
-  iree.spirv.features = ["vulkan", "subgroup=1"]}>
+#vulkan_target_0 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  iree.spirv.features = ["vulkan-spirv"]}>
+#vulkan_target_1 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}>
 
 #pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -308,8 +308,8 @@
   return
 }
 
-//  CHECK-DAG: #[[TARGET0:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan"]}
-//  CHECK-DAG: #[[TARGET1:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "subgroup=1"]}
+//  CHECK-DAG: #[[TARGET0:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}
+//  CHECK-DAG: #[[TARGET1:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}
 
 //      CHECK: hal.executable private @link_executables_linked_spirv_0 {
 //      CHECK:   hal.executable.variant public @vulkan_spirv_fb target(#[[TARGET0]]) {
@@ -377,12 +377,12 @@
 // For such case we can only link two executables together if they have the
 // same set of target requirements.
 
-#vulkan_target_0 = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
-  iree.spirv.features = ["vulkan"]}>
-#vulkan_target_1 = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
-  iree.spirv.features = ["vulkan", "subgroup=1"]}>
-#vulkan_target_2 = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
-  iree.spirv.features = ["vulkan", "subgroup=2"]}>
+#vulkan_target_0 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  iree.spirv.features = ["vulkan-spirv"]}>
+#vulkan_target_1 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}>
+#vulkan_target_2 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  iree.spirv.features = ["vulkan-spirv", "subgroup=2"]}>
 
 #pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -547,9 +547,9 @@
   }
 }
 
-//  CHECK-DAG: #[[TARGET0:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan"]}
-//  CHECK-DAG: #[[TARGET1:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "subgroup=1"]}
-//  CHECK-DAG: #[[TARGET2:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "subgroup=2"]}
+//  CHECK-DAG: #[[TARGET0:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}
+//  CHECK-DAG: #[[TARGET1:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}
+//  CHECK-DAG: #[[TARGET2:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "subgroup=2"]}
 
 //      CHECK: hal.executable private @link_executables_linked_spirv {
 //      CHECK:   hal.executable.variant public @vulkan_spirv_fb_0 target(#[[TARGET0]]) {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
index 9ca9fa7..4a8dbf3 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir

@@ -2,7 +2,7 @@
 // RUN:   --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-decompose-softmax)), iree-spirv-select-lowering-strategy-pass, iree-spirv-lower-executable-target-pass)))' \
 // RUN:   %s | FileCheck %s
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<#spirv.vce<v1.3,
     [Shader, GroupNonUniform, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]>, Unknown:Unknown,
     #spirv.resource_limits<max_compute_workgroup_size = [128, 128, 64], subgroup_size = 32, cooperative_matrix_properties_khr = []>>}>
@@ -98,7 +98,7 @@
 
 // -----
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<#spirv.vce<v1.3,
     [Shader, GroupNonUniform, GroupNonUniformShuffle], [SPV_KHR_storage_buffer_storage_class]>, Unknown:Unknown,
     #spirv.resource_limits<max_compute_workgroup_size = [128, 128, 64], subgroup_size = 32>>}>
@@ -191,7 +191,7 @@
 
 // -----
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<#spirv.vce<v1.3,
     [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:Unknown, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
@@ -315,7 +315,7 @@
 ]>
 
 hal.executable private @dynamic_softmax {
-  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6,
         [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, GroupNonUniformShuffle],
         [SPV_KHR_16bit_storage]>, api=Vulkan, Unknown:DiscreteGPU, #spirv.resource_limits<

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
index 5cb0fc5..539e2d4 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir

@@ -1,6 +1,6 @@
 // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-spirv-select-lowering-strategy-pass, iree-spirv-lower-executable-target-pass)))' -mlir-print-local-scope %s | FileCheck %s
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, Unknown:Unknown,
     #spirv.resource_limits<max_compute_workgroup_size = [128, 128, 64], subgroup_size = 32>>}>
 

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir
index 38f2bba..24f5379 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir

@@ -18,10 +18,10 @@
 
 hal.executable private @dispatch_executable {
   // CHECK-LABEL: hal.executable.variant public @test_assumed_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}>)
   //   CHECK-NOT:   hal.executable.condition
   hal.executable.variant public @test_assumed_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader, GroupNonUniform], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -40,7 +40,7 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_subgroup_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "subgroup.ops=3"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "subgroup.ops=3"]}>)
   //  CHECK-NEXT:   hal.executable.condition(%[[DEV:.+]]: !hal.device) -> i1 {
   //  CHECK-NEXT:   %[[T:.+]] = arith.constant true
   //  CHECK-NEXT:   %[[OK:.+]], %[[V:.+]] = hal.device.query<%[[DEV]] : !hal.device>
@@ -54,7 +54,7 @@
   //  CHECK-NEXT:   hal.return %[[RESULT]] : i1
   //  CHECK-NEXT: }
   hal.executable.variant public @test_subgroup_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [GroupNonUniformShuffle, GroupNonUniformArithmetic], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -73,7 +73,7 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_8bit_storage_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "storage.bitwidths=1"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "storage.bitwidths=1"]}>)
   //  CHECK-NEXT:   hal.executable.condition(%[[DEV:.+]]: !hal.device) -> i1 {
   //  CHECK-NEXT:   %[[T:.+]] = arith.constant true
   //  CHECK-NEXT:   %[[OK:.+]], %[[V:.+]] = hal.device.query<%[[DEV]] : !hal.device>
@@ -87,7 +87,7 @@
   //  CHECK-NEXT:   hal.return %[[RESULT]] : i1
   //  CHECK-NEXT: }
   hal.executable.variant public @test_8bit_storage_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [UniformAndStorageBuffer8BitAccess, StorageBuffer8BitAccess], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -107,7 +107,7 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_16bit_storage_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "storage.bitwidths=2"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "storage.bitwidths=2"]}>)
   //  CHECK-NEXT:   hal.executable.condition(%[[DEV:.+]]: !hal.device) -> i1 {
   //  CHECK-NEXT:   %[[T:.+]] = arith.constant true
   //  CHECK-NEXT:   %[[OK:.+]], %[[V:.+]] = hal.device.query<%[[DEV]] : !hal.device>
@@ -121,7 +121,7 @@
   //  CHECK-NEXT:   hal.return %[[RESULT]] : i1
   //  CHECK-NEXT: }
   hal.executable.variant public @test_16bit_storage_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [StorageBuffer16BitAccess, StorageUniform16], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -141,13 +141,13 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_int_compute_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "compute.bitwidths.int=7"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "compute.bitwidths.int=7"]}>)
   //       CHECK:   %{{.+}}, %[[V:.+]] = hal.device.query<%{{.+}} : !hal.device>
   //  CHECK-SAME:     key("hal.dispatch" :: "compute.bitwidths.int") : i1, i32 = 0 : i32
   //       CHECK:   %[[TARGET:.+]] = arith.constant 7 : i32
   //       CHECK:   %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
   hal.executable.variant public @test_int_compute_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Int64, Int16, Int8], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -166,13 +166,13 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_float_compute_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "compute.bitwidths.fp=3"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "compute.bitwidths.fp=3"]}>)
   //       CHECK:   %{{.+}}, %[[V:.+]] = hal.device.query<%{{.+}} : !hal.device>
   //  CHECK-SAME:     key("hal.dispatch" :: "compute.bitwidths.fp") : i1, i32 = 0 : i32
   //       CHECK:   %[[TARGET:.+]] = arith.constant 3 : i32
   //       CHECK:   %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
   hal.executable.variant public @test_float_compute_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Float16, Float64], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -191,13 +191,13 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_dot_product_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "dotprod.ops=1"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "dotprod.ops=1"]}>)
   //       CHECK:   %{{.+}}, %[[V:.+]] = hal.device.query<%{{.+}} : !hal.device>
   //  CHECK-SAME:     key("hal.dispatch" :: "dotprod.ops") : i1, i32 = 0 : i32
   //       CHECK:   %[[TARGET:.+]] = arith.constant 1 : i32
   //       CHECK:   %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
   hal.executable.variant public @test_dot_product_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [DotProduct, DotProductInput4x8Bit], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -216,13 +216,13 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_cooperative_matrix_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan", "coopmatrix.ops=1"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv", "coopmatrix.ops=1"]}>)
   //       CHECK:   %{{.+}}, %[[V:.+]] = hal.device.query<%{{.+}} : !hal.device>
   //  CHECK-SAME:     key("hal.dispatch" :: "coopmatrix.ops") : i1, i32 = 0 : i32
   //       CHECK:   %[[TARGET:.+]] = arith.constant 1 : i32
   //       CHECK:   %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
   hal.executable.variant public @test_cooperative_matrix_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [CooperativeMatrixKHR], []>, #spirv.resource_limits<>>
       }>
     ) {
@@ -243,8 +243,8 @@
   }
 
   // CHECK-LABEL: hal.executable.variant public @test_address_capabilities
-  //  CHECK-SAME: target(<"vulkan", "vulkan-spirv-fb-ptr",
-  //  CHECK-SAME:   {hal.bindings.indirect, iree.spirv.features = ["vulkan", "compute.bitwidths.int=4", "address.mode=1"]}>)
+  //  CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb-ptr",
+  //  CHECK-SAME:   {hal.bindings.indirect, iree.spirv.features = ["vulkan-spirv", "compute.bitwidths.int=4", "address.mode=1"]}>)
   //       CHECK:   %{{.+}}, %[[V0:.+]] = hal.device.query<%{{.+}} : !hal.device>
   //  CHECK-SAME:     key("hal.dispatch" :: "compute.bitwidths.int") : i1, i32 = 0 : i32
   //       CHECK:   %[[TARGET0:.+]] = arith.constant 4 : i32
@@ -254,7 +254,7 @@
   //       CHECK:   %[[TARGET1:.+]] = arith.constant 1 : i32
   //       CHECK:   %{{.+}} = arith.andi %[[V1]], %[[TARGET1]] : i32
   hal.executable.variant public @test_address_capabilities target(
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb-ptr", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb-ptr", {
         spirv.target_env = #spirv.target_env<#spirv.vce<v1.5,
                                                         [Int64, PhysicalStorageBufferAddresses],
                                                         [SPV_KHR_physical_storage_buffer]>,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index c3bbe30..a3ed5b8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir

@@ -8,7 +8,7 @@
   ]>
 ]>
 hal.executable private @fuse_and_vectorize_fill_matmul {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -59,7 +59,7 @@
   ]>
 ]>
 hal.executable private @fuse_and_vectorize_matmul_add {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
index 34129f7..4407edd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir

@@ -7,7 +7,7 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
@@ -105,7 +105,7 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
index 2f4b364..93abfa0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable private @static_scatter_update_slice  {
-  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb">) {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @static_scatter_update_slice layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [16 : index, 1 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir
index 206a931..e84dacb 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir

@@ -1,13 +1,13 @@
 // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-spirv-trim-executable-target-env)))' %s | FileCheck %s
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, GroupNonUniformArithmetic],
                                       [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>,
                                       api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>}>
 
 
-// CHECK-DAG: #[[$TARGET0:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
-// CHECK-DAG: #[[$TARGET1:.+]] = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, GroupNonUniformArithmetic], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
+// CHECK-DAG: #[[$TARGET0:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
+// CHECK-DAG: #[[$TARGET1:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, GroupNonUniformArithmetic], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
 
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 

diff --git a/compiler/src/iree/compiler/ConstEval/BUILD.bazel b/compiler/src/iree/compiler/ConstEval/BUILD.bazel
index 998b2c2..e884b6f 100644
--- a/compiler/src/iree/compiler/ConstEval/BUILD.bazel
+++ b/compiler/src/iree/compiler/ConstEval/BUILD.bazel

@@ -54,6 +54,7 @@
         ":PassHeaders",
         ":PassesIncGen",
         ":Runtime",
+        "//compiler/src/iree/compiler/Dialect/HAL/Target",
         "//compiler/src/iree/compiler/Pipelines",
         "//compiler/src/iree/compiler/Utils",
         "@llvm-project//llvm:Support",

diff --git a/compiler/src/iree/compiler/ConstEval/CMakeLists.txt b/compiler/src/iree/compiler/ConstEval/CMakeLists.txt
index f8679bd..3b675fa 100644
--- a/compiler/src/iree/compiler/ConstEval/CMakeLists.txt
+++ b/compiler/src/iree/compiler/ConstEval/CMakeLists.txt

@@ -51,6 +51,7 @@
     MLIRFunctionInterfaces
     MLIRIR
     MLIRPass
+    iree::compiler::Dialect::HAL::Target
     iree::compiler::Pipelines
     iree::compiler::Utils
   PUBLIC

diff --git a/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp b/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp
index ad398f9..86f45eb 100644
--- a/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp
+++ b/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp

@@ -28,9 +28,9 @@
 
 namespace mlir::iree_compiler::ConstEval {
 
-static llvm::cl::opt<std::string> clJitTargetBackend(
-    "iree-consteval-jit-target-backend",
-    llvm::cl::desc("Overrides the target backend used for JIT'ing."),
+static llvm::cl::opt<std::string> clJitTargetDevice(
+    "iree-consteval-jit-target-device",
+    llvm::cl::desc("Overrides the target device used for JIT'ing."),
     llvm::cl::init(""));
 
 static llvm::cl::opt<bool> clEnableDebug(
@@ -314,34 +314,37 @@
 };
 
 struct JitGlobalsPass : public JitGlobalsBase<JitGlobalsPass> {
-  JitGlobalsPass(const IREE::HAL::TargetBackendRegistry &targetRegistry)
-      : options(std::make_shared<CompileOptions>()),
+  JitGlobalsPass(const JitGlobalsOptions &options)
+      : compileOptions(std::make_shared<CompileOptions>()),
         compilePipeline("builtin.module") {
+    targetRegistry = options.targetRegistry;
+
     // Detect backend.
-    requestedTargetBackend = resolveTargetBackend(targetRegistry);
-    hasRequestedTargetBackend =
-        targetRegistry.getTargetBackend(requestedTargetBackend) != nullptr;
-    options->executableOptions.targets.push_back(requestedTargetBackend);
-    options->targetOptions.f32Extension = true;
-    options->targetOptions.f64Extension = true;
-    options->targetOptions.truncateUnsupportedFloats = false;
-    if (requestedTargetBackend == "vmvx" || !hasRequestedTargetBackend) {
-      targetBackend = targetRegistry.getTargetBackend("vmvx");
+    requestedTargetDevice = resolveTargetDevice(*targetRegistry.value);
+    hasRequestedTargetDevice =
+        targetRegistry->getTargetDevice(requestedTargetDevice) != nullptr;
+    compileOptions->executableOptions.targets.push_back(requestedTargetDevice);
+    compileOptions->targetOptions.f32Extension = true;
+    compileOptions->targetOptions.f64Extension = true;
+    compileOptions->targetOptions.truncateUnsupportedFloats = false;
+    if (requestedTargetDevice == "vmvx" || !hasRequestedTargetDevice) {
+      targetDevice = targetRegistry->getTargetDevice("vmvx");
     } else {
-      targetBackend = targetRegistry.getTargetBackend(requestedTargetBackend);
+      targetDevice = targetRegistry->getTargetDevice(requestedTargetDevice);
     }
 
     // Disable constant evaluation for our Jit compilation pipeline.
     // It would make no sense to recursively do constant evaluation, and since
     // we omit the necessary hooks, it is unsupported anyway.
-    options->globalOptimizationOptions.constExprHoisting = false;
-    options->globalOptimizationOptions.constEval = false;
+    compileOptions->globalOptimizationOptions.constExprHoisting = false;
+    compileOptions->globalOptimizationOptions.constEval = false;
 
     buildIREEVMTransformPassPipeline(
-        targetRegistry, options->bindingOptions, options->inputOptions,
-        options->preprocessingOptions, options->globalOptimizationOptions,
-        options->schedulingOptions, options->executableOptions,
-        options->targetOptions, options->hooks, compilePipeline);
+        *targetRegistry.value, compileOptions->bindingOptions,
+        compileOptions->inputOptions, compileOptions->preprocessingOptions,
+        compileOptions->globalOptimizationOptions,
+        compileOptions->schedulingOptions, compileOptions->executableOptions,
+        compileOptions->targetOptions, compileOptions->hooks, compilePipeline);
   }
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -349,18 +352,18 @@
   }
 
   static std::string
-  resolveTargetBackend(const IREE::HAL::TargetBackendRegistry &targetRegistry) {
-    if (clJitTargetBackend.empty()) {
+  resolveTargetDevice(const IREE::HAL::TargetRegistry &targetRegistry) {
+    if (clJitTargetDevice.empty()) {
       // Default - choose something we have.
       // First llvm-cpu then vmvx.
-      if (targetRegistry.getTargetBackend("llvm-cpu")) {
+      if (targetRegistry.getTargetDevice("llvm-cpu")) {
         return std::string("llvm-cpu");
       } else {
         return std::string("vmvx");
       }
     }
 
-    return clJitTargetBackend;
+    return clJitTargetDevice;
   }
 
   const SupportedFeatures getSupportedFeatures(MLIRContext *context) {
@@ -371,7 +374,7 @@
     // the `eval_i4_tensor` test in `jit_globals.mlir` to fail.
     // TODO(#16321): Enable on other backends once this has been tested
     // outside llvm-cpu.
-    if (requestedTargetBackend == "llvm-cpu" && hasRequestedTargetBackend)
+    if (requestedTargetDevice == "llvm-cpu" && hasRequestedTargetDevice)
       s.addScalarType(b.getIntegerType(4));
     s.addScalarType(b.getIntegerType(8));
     s.addScalarType(b.getIntegerType(16));
@@ -383,14 +386,14 @@
 
     // TODO(#16321): Enable on other backends once this has been tested outside
     // llvm-cpu.
-    if (requestedTargetBackend == "llvm-cpu" && hasRequestedTargetBackend)
+    if (requestedTargetDevice == "llvm-cpu" && hasRequestedTargetDevice)
       s.addElementType(b.getIntegerType(4));
     s.addElementType(b.getIntegerType(8));
     s.addElementType(b.getIntegerType(16));
     s.addElementType(b.getIntegerType(32));
     s.addElementType(b.getIntegerType(64));
     s.addElementType(b.getF32Type());
-    if (requestedTargetBackend != "vmvx" && hasRequestedTargetBackend) {
+    if (requestedTargetDevice != "vmvx" && hasRequestedTargetDevice) {
       // The full compilers support additional types.
       // TODO: Enable support for i4 once it is worked out how to
       // transfer to and from ElementsAttr.
@@ -474,15 +477,15 @@
     llvm::TimerGroup tg("iree-consteval-jit", "Consteval Jit");
     auto outerModule = getOperation();
     auto supportedFeatures = getSupportedFeatures(&getContext());
-    if (!hasRequestedTargetBackend) {
+    if (!hasRequestedTargetDevice) {
       emitWarning(UnknownLoc::get(&getContext()))
-          << "consteval jit requested with " << requestedTargetBackend
+          << "consteval jit requested with " << requestedTargetDevice
           << " backend, but it is not available. Falling back to vmvx";
     }
-    if (!targetBackend) {
+    if (!targetDevice) {
       emitError(UnknownLoc::get(&getContext()))
           << "consteval jit could not find a usable backend (requested '"
-          << requestedTargetBackend << "')";
+          << requestedTargetDevice << "')";
       signalPassFailure();
       return;
     }
@@ -498,11 +501,11 @@
 
     // Set the target.
     std::optional<IREE::HAL::DeviceTargetAttr> targetAttr =
-        targetBackend->getHostDeviceTarget(&getContext());
+        targetDevice->getHostDeviceTarget(&getContext(), *targetRegistry.value);
     {
       if (!targetAttr) {
         emitError(UnknownLoc::get(&getContext()))
-            << "consteval requested backend " << requestedTargetBackend
+            << "consteval requested backend " << requestedTargetDevice
             << " cannot target the host";
         signalPassFailure();
         return;
@@ -531,7 +534,7 @@
 
     std::optional<llvm::Timer> compileTimer;
     if (debugEnabled) {
-      dbgs() << "::: COMPILING JIT (" << requestedTargetBackend
+      dbgs() << "::: COMPILING JIT (" << requestedTargetDevice
              << "): " << programBuilder.getTargetModule() << "\n";
       compileTimer.emplace("iree-consteval-jit-compile", "Compiling", tg);
       compileTimer->startTimer();
@@ -568,24 +571,23 @@
     }
   }
 
-  std::shared_ptr<CompileOptions> options;
+  std::shared_ptr<CompileOptions> compileOptions;
   OpPassManager compilePipeline;
-  std::string requestedTargetBackend;
-  std::shared_ptr<IREE::HAL::TargetBackend> targetBackend;
-  bool hasRequestedTargetBackend;
+  std::string requestedTargetDevice;
+  std::shared_ptr<IREE::HAL::TargetDevice> targetDevice;
+  bool hasRequestedTargetDevice;
   bool debugEnabled = isDebugEnabled();
 };
 
 } // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>>
-createJitGlobalsPass(const IREE::HAL::TargetBackendRegistry &targetRegistry) {
-  return std::make_unique<JitGlobalsPass>(targetRegistry);
+createJitGlobalsPass(const JitGlobalsOptions &options) {
+  return std::make_unique<JitGlobalsPass>(options);
 }
 
 std::unique_ptr<OperationPass<ModuleOp>> createJitGlobalsPass() {
-  return std::make_unique<JitGlobalsPass>(
-      IREE::HAL::TargetBackendRegistry::getGlobal());
+  return std::make_unique<JitGlobalsPass>(JitGlobalsOptions{});
 }
 
 } // namespace mlir::iree_compiler::ConstEval

diff --git a/compiler/src/iree/compiler/ConstEval/PassDetail.h b/compiler/src/iree/compiler/ConstEval/PassDetail.h
index c88578d..e1eb95d 100644
--- a/compiler/src/iree/compiler/ConstEval/PassDetail.h
+++ b/compiler/src/iree/compiler/ConstEval/PassDetail.h

@@ -7,6 +7,7 @@
 #ifndef IREE_COMPILER_CONSTEVAL_PASSDETAIL_H_
 #define IREE_COMPILER_CONSTEVAL_PASSDETAIL_H_
 
+#include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 

diff --git a/compiler/src/iree/compiler/ConstEval/Passes.h b/compiler/src/iree/compiler/ConstEval/Passes.h
index ac8ebd3..a61b19d 100644
--- a/compiler/src/iree/compiler/ConstEval/Passes.h
+++ b/compiler/src/iree/compiler/ConstEval/Passes.h

@@ -7,20 +7,20 @@
 #ifndef IREE_COMPILER_CONSTEVAL_PASSES_H_
 #define IREE_COMPILER_CONSTEVAL_PASSES_H_
 
+#include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 
-namespace mlir::iree_compiler::IREE::HAL {
-class TargetBackendRegistry;
-} // namespace mlir::iree_compiler::IREE::HAL
-
 namespace mlir::iree_compiler::ConstEval {
 
+#define GEN_PASS_DECL
+#include "iree/compiler/ConstEval/Passes.h.inc"
+
 /// Creates a pass which uses the compiler and runtime to Jit global
 /// initializers eligible for optimization and uses the actual results to
 /// simplify the globals in the module.
 std::unique_ptr<OperationPass<ModuleOp>>
-createJitGlobalsPass(const IREE::HAL::TargetBackendRegistry &targetRegistry);
+createJitGlobalsPass(const JitGlobalsOptions &options);
 
 // Creates with the global target registry (for opt and such). This
 // may only have access to the VMVX backend.

diff --git a/compiler/src/iree/compiler/ConstEval/Passes.td b/compiler/src/iree/compiler/ConstEval/Passes.td
index 1ddbbbc..8dc7a21 100644
--- a/compiler/src/iree/compiler/ConstEval/Passes.td
+++ b/compiler/src/iree/compiler/ConstEval/Passes.td

@@ -13,6 +13,13 @@
   Pass<"iree-consteval-jit-globals", "ModuleOp"> {
   let summary = "Jits global initializers and evaluates them into concrete values";
   let constructor = "mlir::iree_compiler::ConstEval::createJitGlobalsPass()";
+  let options = [
+    Option<
+      "targetRegistry", "target-registry",
+      "llvm::cl::TargetRegistryRef", "",
+      "Target backend registry containing the list of available backends."
+    >,
+  ];
 }
 
 #endif // IREE_COMPILER_JITEVAL_PASSES

diff --git a/compiler/src/iree/compiler/ConstEval/test/failing.mlir b/compiler/src/iree/compiler/ConstEval/test/failing.mlir
index 5fabbeb..daeb44e 100644
--- a/compiler/src/iree/compiler/ConstEval/test/failing.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/failing.mlir

@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --iree-consteval-jit-target-backend=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
+// RUN: iree-opt --split-input-file --iree-consteval-jit-target-device=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
 // XFAIL: *
 
 // CHECK-LABEL: @eval_f64_scalar

diff --git a/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir b/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir
index 13429e8..ef3ed50 100644
--- a/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir

@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --iree-consteval-jit-target-backend=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
+// RUN: iree-opt --split-input-file --iree-consteval-jit-target-device=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
 
 // TODO(laurenzo): Full type matrix for tests.
 

diff --git a/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir b/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir
index 734165f..c84ae10 100644
--- a/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir

@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --iree-consteval-jit-target-backend=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
+// RUN: iree-opt --split-input-file --iree-consteval-jit-target-device=vmvx --verify-diagnostics --iree-consteval-jit-debug --iree-consteval-jit-globals  %s | FileCheck %s
 
 // CHECK-LABEL: @eval_i8_scalar
 // CHECK: 42 : i8

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Target/BUILD.bazel
index 655eb9a..98f74df 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/BUILD.bazel

@@ -16,10 +16,12 @@
     name = "Target",
     srcs = [
         "TargetBackend.cpp",
+        "TargetDevice.cpp",
         "TargetRegistry.cpp",
     ],
     hdrs = [
         "TargetBackend.h",
+        "TargetDevice.h",
         "TargetRegistry.h",
     ],
     deps = [

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Target/CMakeLists.txt
index 12aca6d..2143777 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/CMakeLists.txt

@@ -15,9 +15,11 @@
     Target
   HDRS
     "TargetBackend.h"
+    "TargetDevice.h"
     "TargetRegistry.h"
   SRCS
     "TargetBackend.cpp"
+    "TargetDevice.cpp"
     "TargetRegistry.cpp"
   DEPS
     LLVMSupport

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/BUILD.bazel
index 301f71b..1f2f99a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/BUILD.bazel

@@ -108,6 +108,7 @@
         "LLVMTargetOptions.h",
     ],
     deps = [
+        "@llvm-project//llvm:Analysis",
         "@llvm-project//llvm:MC",
         "@llvm-project//llvm:Passes",
         "@llvm-project//llvm:Support",

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/CMakeLists.txt
index 7fb7e65..d3147f9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/CMakeLists.txt

@@ -89,6 +89,7 @@
   SRCS
     "LLVMTargetOptions.cpp"
   DEPS
+    LLVMAnalysis
     LLVMMC
     LLVMPasses
     LLVMSupport

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp
index b3dba8e..f3b5311 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp

@@ -24,7 +24,6 @@
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h"
 #include "iree/compiler/Utils/ModuleUtils.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/GlobalValue.h"
@@ -46,27 +45,23 @@
 #define DEBUG_TYPE "iree-llvm-cpu-target"
 using llvm::dbgs;
 
-static llvm::cl::opt<std::string> clEnableCPUUkernels(
-    "iree-llvmcpu-enable-ukernels",
-    llvm::cl::desc("Enables microkernels in the llvmcpu backend. May be "
-                   "`default`, `none`, `all`, or a comma-separated list of "
-                   "specific unprefixed microkernels to enable, e.g. `mmt4d`."),
-    llvm::cl::init("default"));
-
-static llvm::cl::opt<bool> clLinkCPUUKernelBitcode(
-    "iree-llvmcpu-link-ukernel-bitcode",
-    llvm::cl::desc("Link ukernel bitcode libraries into generated executables"),
-    llvm::cl::init(true));
-
-static llvm::cl::opt<unsigned> clNativeVectorWidthInBytes(
-    "iree-llvmcpu-native-vector-width-in-bytes",
-    llvm::cl::desc("sets the native vector register width of the hardware. It "
-                   "overrides any inferred vector register width"),
-    llvm::cl::init(0));
-
-// Default native vector width when target or specific native vector width are
-// not provided.
-constexpr unsigned defaultNativeVectorWidth = 16;
+//===----------------------------------------------------------------------===//
+//    __    __   ___________    ____     ____    ____  ______    __    __     //
+//   |  |  |  | |   ____\   \  /   /     \   \  /   / /  __  \  |  |  |  |    //
+//   |  |__|  | |  |__   \   \/   /       \   \/   / |  |  |  | |  |  |  |    //
+//   |   __   | |   __|   \_    _/         \_    _/  |  |  |  | |  |  |  |    //
+//   |  |  |  | |  |____    |  |   __        |  |    |  `--'  | |  `--'  |    //
+//   |__|  |__| |_______|   |__|  (_ )       |__|     \______/   \______/     //
+//                                 |/                                         //
+//===----------------------------------------------------------------------===//
+//
+// Do _not_ add command-line flags here: IREE is a cross-compiler and can
+// compile for multiple targets in a single invocation. Global flags added here
+// apply to all targets with no way to override them from hosting applications
+// that may need to programmatically set them per target and that's bad.
+//
+// Flags *must* be added to the LLVMTarget if they are target-specific and
+// LLVMTargetOptions if they are apply to the whole backend.
 
 namespace mlir::iree_compiler::IREE::HAL {
 
@@ -143,19 +138,123 @@
   return success();
 }
 
-class LLVMCPUTargetBackend final : public TargetBackend {
+class LLVMCPUTargetDevice final : public TargetDevice {
 public:
-  struct AdditionalConfigurationValues {
-    std::string dataLayoutStr;
-    int64_t vectorSize;
-  };
+  LLVMCPUTargetDevice() = default;
 
-  explicit LLVMCPUTargetBackend(LLVMTargetOptions options)
-      : defaultOptions_(std::move(options)) {
-    initializeAdditionalConfiguration(defaultOptions_);
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+
+    auto configAttr = b.getDictionaryAttr(configItems);
+
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("llvm-cpu")
+        ->getDefaultExecutableTargets(context, "llvm-cpu", configAttr,
+                                      executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context,
+                                            b.getStringAttr("llvm-cpu"),
+                                            configAttr, executableTargetAttrs);
   }
 
-  std::string name() const override { return "llvm-cpu"; }
+  std::optional<IREE::HAL::DeviceTargetAttr>
+  getHostDeviceTarget(MLIRContext *context,
+                      const TargetRegistry &targetRegistry) const override {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+
+    auto configAttr = b.getDictionaryAttr(configItems);
+
+    // If we had multiple target environments we would generate one target attr
+    // per environment, with each setting its own environment attribute.
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("llvm-cpu")
+        ->getHostExecutableTargets(context, "llvm-cpu", configAttr,
+                                   executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context,
+                                            b.getStringAttr("llvm-cpu"),
+                                            configAttr, executableTargetAttrs);
+  }
+};
+
+class LLVMCPUTargetBackend final : public TargetBackend {
+public:
+  explicit LLVMCPUTargetBackend(LLVMTargetOptions options)
+      : defaultOptions_(std::move(options)) {}
+
+  std::string getLegacyDefaultDeviceID() const override { return "llvm-cpu"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
+    executableTargetAttrs.push_back(
+        getExecutableTarget(context, defaultOptions_.target));
+  }
+
+  void getHostExecutableTargets(MLIRContext *context, StringRef deviceID,
+                                DictionaryAttr deviceConfigAttr,
+                                SmallVectorImpl<IREE::HAL::ExecutableTargetAttr>
+                                    &executableTargetAttrs) const override {
+    std::optional<LLVMTarget> maybeTarget = LLVMTarget::createForHost();
+    if (maybeTarget) {
+      executableTargetAttrs.push_back(
+          getExecutableTarget(context, *maybeTarget));
+    }
+  }
+
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context, const LLVMTarget &target) const {
+    // Add some configurations to the `hal.executable.target` attribute.
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    target.storeToConfigAttrs(context, configItems);
+
+    // Compute the format used at runtime to select the executable loader.
+    std::string format;
+    if (target.linkStatic) {
+      // Static libraries are just string references when serialized so we don't
+      // need to specify the target architecture.
+      format += "static";
+    } else {
+      // Construct the [loader]-[format]-[arch] triple.
+      llvm::Triple targetTriple(target.getTriple());
+      if (target.getLinkEmbedded()) {
+        // Using the IREE embedded ELF format/loader.
+        format += "embedded-elf-";
+      } else {
+        // System-specific shared library format.
+        format += "system-";
+        switch (targetTriple.getObjectFormat()) {
+        case llvm::Triple::ObjectFormatType::COFF:
+          format += "dll-";
+          break;
+        case llvm::Triple::ObjectFormatType::ELF:
+          format += "elf-";
+          break;
+        case llvm::Triple::ObjectFormatType::MachO:
+          format += "dylib-";
+          break;
+        case llvm::Triple::ObjectFormatType::Wasm:
+          format += "wasm-";
+          break;
+        default:
+          format += "unknown-";
+          break;
+        }
+      }
+      format += getIreeArchNameForTargetTriple(targetTriple);
+    }
+    return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
+        b.getStringAttr("llvm-cpu"), b.getStringAttr(format),
+        b.getDictionaryAttr(configItems));
+  }
 
   void getDependentDialects(DialectRegistry &registry) const override {
     mlir::registerBuiltinDialectTranslation(registry);
@@ -173,38 +272,6 @@
     // clang-format on
   }
 
-  IREE::HAL::DeviceTargetAttr getDeviceTargetFromTarget(
-      MLIRContext *context, const LLVMTarget &target,
-      const AdditionalConfigurationValues &addlConfig) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-
-    // If we had multiple target environments we would generate one target attr
-    // per environment, with each setting its own environment attribute.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
-    targetAttrs.push_back(getExecutableTarget(context, target, addlConfig));
-
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
-  }
-
-  IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
-    return getDeviceTargetFromTarget(context, defaultOptions_.target,
-                                     defaultAddlConfig_);
-  }
-
-  std::optional<IREE::HAL::DeviceTargetAttr>
-  getHostDeviceTarget(MLIRContext *context) const override {
-    std::optional<LLVMTarget> maybeTarget = LLVMTarget::createForHost();
-    if (!maybeTarget) {
-      return {};
-    }
-    return getDeviceTargetFromTarget(context, *maybeTarget, defaultAddlConfig_);
-  }
-
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
                                       OpPassManager &passManager) override {
     buildLLVMCPUCodegenConfigurationPassPipeline(passManager);
@@ -486,7 +553,7 @@
       }
     }
 
-    if (clLinkCPUUKernelBitcode) {
+    if (target.linkUkernelBitcode) {
       // Link in ukernel bitcode.
       if (hasUkernel(variantOp.getTarget())) {
         llvm::Expected<std::unique_ptr<llvm::Module>> bitcode =
@@ -768,141 +835,17 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context, const LLVMTarget &target,
-                      const AdditionalConfigurationValues &addlConfig) const {
-    // Add some configurations to the `hal.executable.target` attribute.
-    Builder b(context);
-    SmallVector<NamedAttribute> configAttrs;
-    target.storeToConfigAttrs(context, configAttrs);
-
-    // Compute the format.
-    std::string format;
-    if (target.linkStatic) {
-      // Static libraries are just string references when serialized so we don't
-      // need to specify the target architecture.
-      format += "static";
-    } else {
-      // Construct the [loader]-[format]-[arch] triple.
-      llvm::Triple targetTriple(target.getTriple());
-      if (target.getLinkEmbedded()) {
-        // Using the IREE embedded ELF format/loader.
-        format += "embedded-elf-";
-      } else {
-        // System-specific shared library format.
-        format += "system-";
-        switch (targetTriple.getObjectFormat()) {
-        case llvm::Triple::ObjectFormatType::COFF:
-          format += "dll-";
-          break;
-        case llvm::Triple::ObjectFormatType::ELF:
-          format += "elf-";
-          break;
-        case llvm::Triple::ObjectFormatType::MachO:
-          format += "dylib-";
-          break;
-        case llvm::Triple::ObjectFormatType::Wasm:
-          format += "wasm-";
-          break;
-        default:
-          format += "unknown-";
-          break;
-        }
-      }
-      format += getIreeArchNameForTargetTriple(targetTriple);
-    }
-
-    // Additional configuration items that are used in various places.
-    // Note: Some of these look to not be isolated properly for
-    // cross-compilation, likely requiring a closer look.
-    // Set data layout
-    configAttrs.emplace_back(b.getStringAttr("data_layout"),
-                             b.getStringAttr(addlConfig.dataLayoutStr));
-    // Set the native vector size. This creates a dummy llvm module just to
-    // build the TTI the right way.
-    configAttrs.emplace_back(b.getStringAttr("native_vector_size"),
-                             b.getIndexAttr(addlConfig.vectorSize));
-
-    std::string enableUkernels = clEnableCPUUkernels.getValue();
-    // Check if microkernels are to be enabled.
-    configAttrs.emplace_back(b.getStringAttr("ukernels"),
-                             b.getStringAttr(enableUkernels));
-
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, StringAttr::get(context, "llvm-cpu"),
-        StringAttr::get(context, format),
-        DictionaryAttr::get(context, configAttrs));
-  }
-
-  void initializeAdditionalConfiguration(const LLVMTargetOptions &options) {
-    auto targetMachine = createTargetMachine(options.target);
-    // TODO(#13988): proper error propagation. This is a common user scenario.
-    assert(targetMachine && "createTargetMachine failed");
-
-    // Data layout
-    llvm::DataLayout DL = targetMachine->createDataLayout();
-    defaultAddlConfig_.dataLayoutStr = DL.getStringRepresentation();
-
-    // Set the native vector size. This creates a dummy llvm module just to
-    // build the TTI the right way.
-    llvm::LLVMContext llvmContext;
-    auto llvmModule =
-        std::make_unique<llvm::Module>("dummy_module", llvmContext);
-    llvm::Type *voidType = llvm::Type::getVoidTy(llvmContext);
-    llvmModule->setDataLayout(DL);
-    llvm::Function *dummyFunc = llvm::Function::Create(
-        llvm::FunctionType::get(voidType, false),
-        llvm::GlobalValue::ExternalLinkage, "dummy_func", *llvmModule);
-
-    // If target supports AVX-512, enforce 512-bit vector registers.
-    llvm::StringRef targetFeatures = targetMachine->getTargetFeatureString();
-    if (targetFeatures.contains("avx512")) {
-      dummyFunc->addFnAttr("prefer-vector-width", "512");
-    }
-
-    llvm::TargetTransformInfo tti =
-        targetMachine->getTargetTransformInfo(*dummyFunc);
-
-    // Set the native vector width. We prioritize user-specified widths over
-    // widths provided by TTI.
-    if (clNativeVectorWidthInBytes) {
-      defaultAddlConfig_.vectorSize = clNativeVectorWidthInBytes;
-    } else {
-      unsigned ttiVectorWidth =
-          tti.getRegisterBitWidth(
-              llvm::TargetTransformInfo::RGK_FixedWidthVector) /
-          8;
-      defaultAddlConfig_.vectorSize =
-          ttiVectorWidth > 1 ? ttiVectorWidth : defaultNativeVectorWidth;
-    }
-
-    LLVM_DEBUG({
-      llvm::dbgs() << "CPU : " << targetMachine->getTargetCPU() << "\n";
-      llvm::dbgs() << "Target Triple : "
-                   << targetMachine->getTargetTriple().normalize() << "\n";
-      llvm::dbgs() << "Target Feature string : " << targetFeatures << "\n";
-      llvm::dbgs() << "Data Layout : " << defaultAddlConfig_.dataLayoutStr
-                   << "\n";
-      llvm::dbgs() << "Vector Width : " << defaultAddlConfig_.vectorSize
-                   << "\n";
-    });
-  }
-
   // Default options as registered from the command line. Should not be
   // relied on outside of getDefaultDeviceTarget() since it represents
   // a static "cross compiling" config and would override more specific
   // settings.
   LLVMTargetOptions defaultOptions_;
-
-  // Additional target information besides that is contained in
-  // LLVMTargetOptions defaultOptions_.
-  AdditionalConfigurationValues defaultAddlConfig_;
 };
 
 void registerLLVMCPUTargetBackends(
     std::function<LLVMTargetOptions()> queryOptions) {
   // Make sure flags are registered.
-  LLVMTargetOptions::getFromFlags();
+  LLVMTargetOptions::registerFlags();
 
 // Dynamically do preprocessor dispatch to initialize only targets that we
 // care about if they are enabled. Unfortunately, the way the LLVM macros
@@ -950,13 +893,14 @@
 #define LLVM_TARGET(TargetName) LLVM_INITIALIZE_TARGET_##TargetName()
 #include "llvm/Config/Targets.def"
 
-  auto backendFactory = [=]() {
-    return std::make_shared<LLVMCPUTargetBackend>(queryOptions());
-  };
-
+  // TODO(benvanik): move to a CPU device registration outside of LLVM.
   // #hal.device.target<"llvm-cpu", ...
+  static TargetDeviceRegistration registration0(
+      "llvm-cpu", [=]() { return std::make_shared<LLVMCPUTargetDevice>(); });
   // #hal.executable.target<"llvm-cpu", ...
-  static TargetBackendRegistration registration("llvm-cpu", backendFactory);
+  static TargetBackendRegistration registration1("llvm-cpu", [=]() {
+    return std::make_shared<LLVMCPUTargetBackend>(queryOptions());
+  });
 }
 
 } // namespace mlir::iree_compiler::IREE::HAL

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp
index 26464e0..f28bbf7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp

@@ -24,22 +24,6 @@
 
 namespace mlir::iree_compiler::IREE::HAL {
 
-std::unique_ptr<llvm::TargetMachine>
-createTargetMachine(const LLVMTarget &target) {
-  std::string errorMessage;
-  auto llvmTarget =
-      llvm::TargetRegistry::lookupTarget(target.getTriple(), errorMessage);
-  if (!llvmTarget)
-    return nullptr;
-  std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine(
-      target.getTriple(), target.getCpu() /* cpu e.g k8 */,
-      target.getCpuFeatures() /* cpu features e.g avx512f */,
-      target.llvmTargetOptions, llvm::Reloc::Model::PIC_, {},
-      target.codeGenOptLevel,
-      /*JIT=*/false));
-  return machine;
-}
-
 LogicalResult runLLVMIRPasses(const LLVMTarget &target,
                               llvm::TargetMachine *machine,
                               llvm::Module *module) {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h
index b456579..5a516fe 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h

@@ -16,10 +16,6 @@
 
 namespace mlir::iree_compiler::IREE::HAL {
 
-// Creates target machine form target options.
-std::unique_ptr<llvm::TargetMachine>
-createTargetMachine(const LLVMTarget &target);
-
 // Creates and runs LLVMIR optimization passes defined in LLVMTargetOptions.
 LogicalResult runLLVMIRPasses(const LLVMTarget &target,
                               llvm::TargetMachine *machine,

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp
index 0a01b11..7c36be9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp

@@ -10,9 +10,11 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
@@ -141,19 +143,26 @@
     // but we need to continue to avoid breaking existing users. Hopefully
     // resolveCPUAndCPUFeatures logged a helpful error already.
   }
+
   return target;
 }
 
 std::optional<LLVMTarget> LLVMTarget::createForHost() {
-  return LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host",
-                            /*cpuFeatures=*/"host",
-                            /*requestLinkEmbedded=*/true);
+  auto target =
+      LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host",
+                         /*cpuFeatures=*/"host",
+                         /*requestLinkEmbedded=*/true);
+  if (target)
+    target->populateDefaultsFromTargetMachine();
+  return target;
 }
 
 void LLVMTarget::print(llvm::raw_ostream &os) const {
   os << "LLVMTarget{\n"
      << "  triple=" << triple << ", cpu=" << cpu
      << ", cpuFeatures=" << cpuFeatures << "\n"
+     << "  dataLayout=" << dataLayout << "\n"
+     << "  vectorWidthInBytes=" << vectorWidthInBytes << "\n"
      << "  linkEmbedded=" << linkEmbedded << "\n"
      << "  debugSymbols=" << debugSymbols << "\n"
      << "  sanitizer=" << static_cast<int>(sanitizerKind) << "\n"
@@ -171,6 +180,8 @@
      << "    FloatABIType=" << static_cast<int>(llvmTargetOptions.FloatABIType)
      << "\n"
      << "  }\n"
+     << "  ukernels=" << ukernels << "\n"
+     << "  linkUkernelBitcode=" << linkUkernelBitcode << "\n"
      << "}\n";
 }
 
@@ -183,10 +194,19 @@
   auto addBool = [&](StringRef name, bool value) {
     config.emplace_back(b.getStringAttr(name), b.getBoolAttr(value));
   };
+  auto addInt64 = [&](StringRef name, int64_t value) {
+    config.emplace_back(b.getStringAttr(name), b.getI64IntegerAttr(value));
+  };
 
   addString("target_triple", triple);
   addString("cpu", cpu);
   addString("cpu_features", cpuFeatures);
+  if (!dataLayout.empty()) {
+    addString("data_layout", dataLayout);
+  }
+  if (vectorWidthInBytes != DEFAULT_VECTOR_WIDTH_IN_BYTES) {
+    addInt64("native_vector_size", vectorWidthInBytes);
+  }
   if (linkEmbedded != DEFAULT_LINK_EMBEDDED) {
     addBool("link_embedded", linkEmbedded);
   }
@@ -235,6 +255,10 @@
       break;
     }
   }
+  if (ukernels.compare(DEFAULT_ENABLE_UKERNELS) != 0)
+    addString("ukernels", ukernels);
+  if (linkUkernelBitcode != DEFAULT_LINK_UKERNEL_BITCODE)
+    addBool("link_ukernel_bitcode", linkUkernelBitcode);
 }
 
 std::optional<LLVMTarget>
@@ -266,7 +290,7 @@
     }
     return {};
   };
-  auto getBoolValue = [&](StringRef name, bool fallback) -> bool {
+  auto getBool = [&](StringRef name, bool fallback) -> bool {
     Attribute attr = config.get(name);
     if (auto battr = llvm::dyn_cast_if_present<BoolAttr>(attr)) {
       return battr.getValue();
@@ -277,6 +301,17 @@
     }
     return fallback;
   };
+  auto getInt64 = [&](StringRef name, int64_t fallback) -> int64_t {
+    Attribute attr = config.get(name);
+    if (auto iattr = llvm::dyn_cast_if_present<IntegerAttr>(attr)) {
+      return iattr.getValue().getSExtValue();
+    } else if (attr) {
+      hasFailures = true;
+      emitError(loc) << "executable config '" << name
+                     << "' requires i64 but got " << attr;
+    }
+    return fallback;
+  };
 
   LLVMTarget target;
 
@@ -284,7 +319,7 @@
   auto triple = getOptionalString("target_triple");
   auto cpu = getOptionalString("cpu");
   auto cpuFeatures = getOptionalString("cpu_features");
-  bool linkEmbedded = getBoolValue("link_embedded", DEFAULT_LINK_EMBEDDED);
+  bool linkEmbedded = getBool("link_embedded", DEFAULT_LINK_EMBEDDED);
   if (triple || cpu || cpuFeatures) {
     if (!triple) {
       emitError(loc) << "executable config 'cpu' or 'cpu_features' must be "
@@ -302,9 +337,12 @@
     target.copy(defaultTarget);
   }
 
-  // Loose items.
-  target.debugSymbols = getBoolValue("debug_symbols", DEFAULT_DEBUG_SYMBOLS);
-  target.linkStatic = getBoolValue("link_static", DEFAULT_LINK_STATIC);
+  target.dataLayout = getString("data_layout", DEFAULT_DATA_LAYOUT, false);
+  target.vectorWidthInBytes =
+      getInt64("native_vector_size", DEFAULT_VECTOR_WIDTH_IN_BYTES);
+
+  target.debugSymbols = getBool("debug_symbols", DEFAULT_DEBUG_SYMBOLS);
+  target.linkStatic = getBool("link_static", DEFAULT_LINK_STATIC);
   auto sanitizer = getOptionalString("sanitizer");
   if (sanitizer) {
     if (sanitizer == "none")
@@ -320,13 +358,14 @@
     }
   }
   target.staticLibraryOutput = getString("static_library_output", "", false);
-  target.pipelineTuningOptions.LoopInterleaving = getBoolValue(
+
+  target.pipelineTuningOptions.LoopInterleaving = getBool(
       "loop_interleaving", target.pipelineTuningOptions.LoopInterleaving);
-  target.pipelineTuningOptions.LoopVectorization = getBoolValue(
+  target.pipelineTuningOptions.LoopVectorization = getBool(
       "loop_vectorization", target.pipelineTuningOptions.LoopVectorization);
-  target.pipelineTuningOptions.LoopUnrolling = getBoolValue(
-      "loop_unrolling", target.pipelineTuningOptions.LoopUnrolling);
-  target.pipelineTuningOptions.SLPVectorization = getBoolValue(
+  target.pipelineTuningOptions.LoopUnrolling =
+      getBool("loop_unrolling", target.pipelineTuningOptions.LoopUnrolling);
+  target.pipelineTuningOptions.SLPVectorization = getBool(
       "slp_vectorization", target.pipelineTuningOptions.SLPVectorization);
   auto targetAbi = getOptionalString("target_abi");
   if (targetAbi)
@@ -345,12 +384,72 @@
     }
   }
 
+  target.ukernels = getString("ukernels", target.ukernels, false);
+  target.linkUkernelBitcode =
+      getBool("link_ukernel_bitcode", target.linkUkernelBitcode);
+
   if (hasFailures) {
     return {};
   }
+  target.populateDefaultsFromTargetMachine();
   return target;
 }
 
+void LLVMTarget::populateDefaultsFromTargetMachine() {
+  // We may need the target machine for certain default values.
+  std::unique_ptr<llvm::TargetMachine> cachedTargetMachine;
+  auto getTargetMachine = [&]() {
+    if (!cachedTargetMachine) {
+      cachedTargetMachine = createTargetMachine(*this);
+      // TODO(#13988): proper error propagation. This is a common user scenario.
+      assert(cachedTargetMachine && "createTargetMachine failed");
+    }
+    return cachedTargetMachine.get();
+  };
+
+  if (dataLayout.empty()) {
+    auto targetDataLayout = getTargetMachine()->createDataLayout();
+    dataLayout = targetDataLayout.getStringRepresentation();
+  }
+
+  if (vectorWidthInBytes == DEFAULT_VECTOR_WIDTH_IN_BYTES) {
+    auto targetMachine = getTargetMachine();
+    auto targetFeatures = targetMachine->getTargetFeatureString();
+
+    // The only way to get the real TTI is to create a function using it.
+    // LLVM's TargetMachine and related APIs are terrible. Absolutely yuck.
+    // Note that we use the data layout set above to either what the user
+    // specified or what the target machine returned.
+    //
+    // If anyone comes across this: it'd be great if getTargetTransformInfo
+    // could be called without requiring a function.
+    llvm::LLVMContext llvmContext;
+    auto llvmModule =
+        std::make_unique<llvm::Module>("dummy_module", llvmContext);
+    llvmModule->setDataLayout(dataLayout);
+    llvm::Function *dummyFunc = llvm::Function::Create(
+        llvm::FunctionType::get(llvm::Type::getVoidTy(llvmContext), false),
+        llvm::GlobalValue::ExternalLinkage, "dummy_func", *llvmModule);
+    if (targetFeatures.contains("avx512")) {
+      // Always override the vector with to 512 on systems with avx512.
+      // @dcaballe says:
+      // > in ML the frequency throttling that happens when using 512-bit
+      // > register doesn't have an overall negative impact in performance due
+      // > to the high computational density of the workloads, even on skylake
+      // > where the throttling was really bad
+      dummyFunc->addFnAttr("prefer-vector-width", "512");
+    }
+    auto targetTTI = targetMachine->getTargetTransformInfo(*dummyFunc);
+
+    // Query the vector width from TTI.
+    unsigned ttiVectorWidthInBytes =
+        targetTTI.getRegisterBitWidth(
+            llvm::TargetTransformInfo::RGK_FixedWidthVector) /
+        8;
+    vectorWidthInBytes = ttiVectorWidthInBytes > 1 ? ttiVectorWidthInBytes : 16;
+  }
+}
+
 void LLVMTargetOptions::initializeTargetInvariantFlags() {
   static llvm::cl::opt<std::string> clSystemLinkerPath(
       "iree-llvmcpu-system-linker-path",
@@ -387,32 +486,33 @@
     return {};
   targetOptions.target = *maybeTarget;
   targetOptions.initializeTargetInvariantFlags();
+  targetOptions.target.populateDefaultsFromTargetMachine();
   return targetOptions;
 }
 
-LLVMTargetOptions LLVMTargetOptions::getFromFlags() {
-  LLVMTargetOptions targetOptions;
+// static
+void LLVMTargetOptions::initializeFromFlags(LLVMTargetOptions &targetOptions) {
   targetOptions.initializeTargetInvariantFlags();
 
   // Target parameters.
   static llvm::cl::opt<std::string> clTargetTriple(
       "iree-llvmcpu-target-triple",
-      llvm::cl::desc("LLVM target machine triple"),
+      llvm::cl::desc("LLVM target machine triple."),
       llvm::cl::init(llvm::sys::getProcessTriple()));
   static llvm::cl::opt<std::string> clTargetCPU(
       "iree-llvmcpu-target-cpu",
       llvm::cl::desc(
-          "LLVM target machine CPU; use 'host' for your host native CPU"),
+          "LLVM target machine CPU; use 'host' for your host native CPU."),
       llvm::cl::init("generic"));
   static llvm::cl::opt<std::string> clTargetCPUFeatures(
       "iree-llvmcpu-target-cpu-features",
       llvm::cl::desc("LLVM target machine CPU features; use 'host' for your "
-                     "host native CPU"),
+                     "host native CPU."),
       llvm::cl::init(""));
   static llvm::cl::opt<bool> clLinkEmbedded(
       "iree-llvmcpu-link-embedded",
       llvm::cl::desc("Links binaries into a platform-agnostic ELF to be loaded "
-                     "by the embedded IREE ELF loader"),
+                     "by the embedded IREE ELF loader."),
       llvm::cl::init(LLVMTarget::DEFAULT_LINK_EMBEDDED));
   std::optional<LLVMTarget> maybeTarget =
       LLVMTarget::create(clTargetTriple, clTargetCPU, clTargetCPUFeatures,
@@ -476,6 +576,18 @@
                      "Hardware floating-point instructions")));
   target.llvmTargetOptions.FloatABIType = clTargetFloatABI;
 
+  static llvm::cl::opt<std::string> clTargetDataLayout(
+      "iree-llvmcpu-target-data-layout",
+      llvm::cl::desc("LLVM target machine data layout override."),
+      llvm::cl::init(""));
+  target.dataLayout = clTargetDataLayout;
+  static llvm::cl::opt<unsigned> clTargetVectorWidthInBytes(
+      "iree-llvmcpu-target-vector-width-in-bytes",
+      llvm::cl::desc("Overrides the native vector register width (in bytes) of "
+                     "the target."),
+      llvm::cl::init(0));
+  target.vectorWidthInBytes = clTargetVectorWidthInBytes;
+
   static llvm::cl::opt<bool> clDebugSymbols(
       "iree-llvmcpu-debug-symbols",
       llvm::cl::desc("Generate and embed debug information (DWARF, PDB, etc)"),
@@ -499,6 +611,20 @@
       llvm::cl::init(target.staticLibraryOutput));
   target.staticLibraryOutput = clStaticLibraryOutputPath;
 
+  static llvm::cl::opt<std::string> clEnableUkernels(
+      "iree-llvmcpu-enable-ukernels",
+      llvm::cl::desc("Enables ukernels in the llvmcpu backend. May be "
+                     "`default`, `none`, `all`, or a comma-separated list of "
+                     "specific unprefixed ukernels to enable, e.g. `mmt4d`."),
+      llvm::cl::init("default"));
+  target.ukernels = clEnableUkernels;
+  static llvm::cl::opt<bool> clLinkUKernelBitcode(
+      "iree-llvmcpu-link-ukernel-bitcode",
+      llvm::cl::desc(
+          "Link ukernel bitcode libraries into generated executables"),
+      llvm::cl::init(target.linkUkernelBitcode));
+  target.linkUkernelBitcode = clLinkUKernelBitcode;
+
   static llvm::cl::opt<bool> clListTargets(
       "iree-llvmcpu-list-targets",
       llvm::cl::desc("Lists all registered targets that the LLVM backend can "
@@ -508,8 +634,36 @@
         llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs());
         exit(0);
       }));
+}
 
+// static
+void LLVMTargetOptions::registerFlags() {
+  LLVMTargetOptions targetOptions;
+  initializeFromFlags(targetOptions);
+}
+
+// static
+LLVMTargetOptions LLVMTargetOptions::getFromFlags() {
+  LLVMTargetOptions targetOptions;
+  initializeFromFlags(targetOptions);
+  targetOptions.target.populateDefaultsFromTargetMachine();
   return targetOptions;
 }
 
+std::unique_ptr<llvm::TargetMachine>
+createTargetMachine(const LLVMTarget &target) {
+  std::string errorMessage;
+  auto llvmTarget =
+      llvm::TargetRegistry::lookupTarget(target.getTriple(), errorMessage);
+  if (!llvmTarget)
+    return nullptr;
+  std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine(
+      target.getTriple(), target.getCpu() /* cpu e.g k8 */,
+      target.getCpuFeatures() /* cpu features e.g avx512f */,
+      target.llvmTargetOptions, llvm::Reloc::Model::PIC_, {},
+      target.codeGenOptLevel,
+      /*JIT=*/false));
+  return machine;
+}
+
 } // namespace mlir::iree_compiler::IREE::HAL

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h
index 5dd6e3c..05280a7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h

@@ -29,6 +29,8 @@
 // and linking for an ExecutableVariant. It should not contain any
 // environmental configuration like linker paths, diagnostic aids, etc.
 struct LLVMTarget {
+  static constexpr const char *DEFAULT_DATA_LAYOUT = "";
+  static constexpr int64_t DEFAULT_VECTOR_WIDTH_IN_BYTES = 0;
   static constexpr bool DEFAULT_LINK_EMBEDDED = true;
   static constexpr bool DEFAULT_DEBUG_SYMBOLS = true;
   static constexpr SanitizerKind DEFAULT_SANITIZER_KIND = SanitizerKind::kNone;
@@ -39,6 +41,8 @@
   static constexpr bool DEFAULT_SLP_VECTORIZATION = false;
   static constexpr llvm::FloatABI::ABIType DEFAULT_FLOAT_ABI =
       llvm::FloatABI::ABIType::Hard;
+  static constexpr const char *DEFAULT_ENABLE_UKERNELS = "default";
+  static constexpr bool DEFAULT_LINK_UKERNEL_BITCODE = true;
 
   // Default initialize all fields.
   LLVMTarget();
@@ -47,7 +51,11 @@
     triple = other.triple;
     cpu = other.cpu;
     cpuFeatures = other.cpuFeatures;
+    dataLayout = other.dataLayout;
+    vectorWidthInBytes = other.vectorWidthInBytes;
     linkEmbedded = other.linkEmbedded;
+    ukernels = other.ukernels;
+    linkUkernelBitcode = other.linkUkernelBitcode;
   }
 
   void print(llvm::raw_ostream &os) const;
@@ -73,7 +81,11 @@
   const std::string &getTriple() const { return triple; }
   const std::string &getCpu() const { return cpu; }
   const std::string &getCpuFeatures() const { return cpuFeatures; }
-  bool getLinkEmbedded() const { return linkEmbedded; }
+
+  // Overrides the data layout of the target.
+  std::string dataLayout = DEFAULT_DATA_LAYOUT;
+  // Overrides the vector width (in bytes) of the target.
+  int64_t vectorWidthInBytes = DEFAULT_VECTOR_WIDTH_IN_BYTES;
 
   llvm::PipelineTuningOptions pipelineTuningOptions;
   // Optimization level to be used by the LLVM optimizer (middle-end).
@@ -82,6 +94,8 @@
   llvm::CodeGenOptLevel codeGenOptLevel;
   llvm::TargetOptions llvmTargetOptions;
 
+  bool getLinkEmbedded() const { return linkEmbedded; }
+
   // Include debug information in output files (PDB, DWARF, etc).
   // Though this can be set independently from the optLevel (so -O3 with debug
   // information is valid) it may significantly change the output program
@@ -104,7 +118,17 @@
   // any machine without requiring matching system libraries to be installed.
   bool linkStatic = DEFAULT_LINK_STATIC;
 
+  // Enables ukernels in the generated executables. May be `default`, `none`,
+  // `all`, or a comma-separated list of specific unprefixed ukernels to
+  // enable, e.g. `mmt4d`.
+  std::string ukernels = DEFAULT_ENABLE_UKERNELS;
+
+  // Link built-in ukernel bitcode libraries into generated executables.
+  bool linkUkernelBitcode = DEFAULT_LINK_UKERNEL_BITCODE;
+
 private:
+  void populateDefaultsFromTargetMachine();
+
   std::string triple;
   std::string cpu;
   std::string cpuFeatures;
@@ -113,6 +137,8 @@
   // Note: this is ignored for target machines that do not support the ELF
   // loader, such as WebAssembly.
   bool linkEmbedded = DEFAULT_LINK_EMBEDDED;
+
+  friend struct LLVMTargetOptions;
 };
 
 struct LLVMTargetOptions {
@@ -138,13 +164,22 @@
   // are target invariant.
   static LLVMTargetOptions getHostOptions();
 
-  // Returns LLVMTargetOptions struct intialized with the iree-llvmcpu-* flags.
+  // Ensures that command line flags are registered. Should be called on
+  // startup.
+  static void registerFlags();
+
+  // Returns LLVMTargetOptions struct initialized with the iree-llvmcpu-* flags.
   static LLVMTargetOptions getFromFlags();
 
 private:
+  static void initializeFromFlags(LLVMTargetOptions &targetOptions);
   void initializeTargetInvariantFlags();
 };
 
+// Creates target machine form target options.
+std::unique_ptr<llvm::TargetMachine>
+createTargetMachine(const LLVMTarget &target);
+
 } // namespace mlir::iree_compiler::IREE::HAL
 
 #endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMTARGETOPTIONS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp
index 819cf63..0fae79f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp

@@ -91,6 +91,35 @@
       llvm::cl::cat(halTargetOptionsCategory));
 }
 
+SmallVector<std::string>
+gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp) {
+  SmallVector<std::string> targetNames;
+  llvm::SmallDenseSet<StringRef> targets;
+  executableOp.walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
+    auto targetName = variantOp.getTarget().getBackend().getValue();
+    if (targets.insert(targetName).second) {
+      targetNames.push_back(targetName.str());
+    }
+  });
+  llvm::stable_sort(targetNames);
+  return targetNames;
+}
+
+SmallVector<std::string> gatherExecutableTargetNames(mlir::ModuleOp moduleOp) {
+  SmallVector<std::string> targetNames;
+  llvm::stable_sort(targetNames);
+  llvm::SmallDenseSet<StringRef> targets;
+  moduleOp.walk([&](IREE::HAL::ExecutableOp executableOp) {
+    executableOp.walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
+      auto targetName = variantOp.getTarget().getBackend().getValue();
+      if (targets.insert(targetName).second) {
+        targetNames.push_back(targetName.str());
+      }
+    });
+  });
+  return targetNames;
+}
+
 void dumpDataToPath(StringRef path, StringRef baseName, StringRef suffix,
                     StringRef extension, StringRef data) {
   auto fileName = (llvm::join_items("_", baseName, suffix) + extension).str();

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h
index 58bdfbb..4efdeb4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h

@@ -123,12 +123,24 @@
 public:
   virtual ~TargetBackend() = default;
 
-  // Returns a name for the backend used to differentiate between other targets.
-  virtual std::string name() const = 0;
+  // Returns the ID of a DeviceTarget that can execute files produced by this
+  // backend. This is used to support the `--iree-hal-target-backends=` flag
+  // and will be removed in the future.
+  virtual std::string getLegacyDefaultDeviceID() const = 0;
 
-  // Returns the name of the runtime device for this backend.
-  // TODO(benvanik): remove this once we can properly specify targets.
-  virtual std::string deviceID() const { return name(); }
+  // Appends zero or more executable targets for a device with the given
+  // ID and configuration using flags/options that control target defaults.
+  virtual void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const = 0;
+
+  // Appends zero or more executable targets for a device with the given
+  // ID and configuration that represents the hosting machine.
+  virtual void getHostExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const {}
 
   // Registers dependent dialects for the TargetBackend.
   // Mirrors the method on mlir::Pass of the same name. A TargetBackend is
@@ -136,21 +148,6 @@
   // Types, Attributes).
   virtual void getDependentDialects(DialectRegistry &registry) const {}
 
-  // Returns the default device this backend targets. This may involve setting
-  // defaults from flags and other environmental sources, and it may be
-  // cross-targeting in a way that is not compatible with the host.
-  virtual IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const = 0;
-
-  // Similar to getDefaultDeviceTarget, but always returns a DeviceTargetAttr
-  // that is configured for the host, regardless of if flags/environment were
-  // configured to cross-target in some way.
-  //
-  virtual std::optional<IREE::HAL::DeviceTargetAttr>
-  getHostDeviceTarget(MLIRContext *context) const {
-    return {};
-  }
-
   // Inserts passes used to configure the `hal.executable.variant` op contents
   // for translation. The pass manager will be nested on `hal.executable` such
   // that the pipeline will only run on executable contents.
@@ -312,6 +309,13 @@
   }
 };
 
+// Returns a sorted uniqued set of target backends used in the executable.
+SmallVector<std::string>
+gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp);
+
+// Returns a sorted uniqued set of target backends used in the entire module.
+SmallVector<std::string> gatherExecutableTargetNames(mlir::ModuleOp moduleOp);
+
 // Dumps binary data to a file formed by joining the given path components:
 //   `path/baseName_suffix[extension]`
 void dumpDataToPath(StringRef path, StringRef baseName, StringRef suffix,

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.cpp
new file mode 100644
index 0000000..8641a33
--- /dev/null
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.cpp

@@ -0,0 +1,15 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Dialect/HAL/Target/TargetDevice.h"
+
+#include "mlir/IR/Dialect.h"
+
+namespace mlir::iree_compiler::IREE::HAL {
+
+// TODO(benvanik): add device options.
+
+} // namespace mlir::iree_compiler::IREE::HAL

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.h
new file mode 100644
index 0000000..357e51f
--- /dev/null
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetDevice.h

@@ -0,0 +1,47 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_DIALECT_HAL_TARGET_TARGETDEVICE_H_
+#define IREE_COMPILER_DIALECT_HAL_TARGET_TARGETDEVICE_H_
+
+#include <optional>
+#include <string>
+
+#include "iree/compiler/Dialect/HAL/IR/HALTypes.h"
+#include "mlir/IR/Dialect.h"
+
+namespace mlir::iree_compiler::IREE::HAL {
+
+class TargetRegistry;
+
+// HAL device target interface.
+class TargetDevice {
+public:
+  virtual ~TargetDevice() = default;
+
+  // Returns the default device this backend targets. This may involve setting
+  // defaults from flags and other environmental sources, and it may be
+  // cross-targeting in a way that is not compatible with the host.
+  virtual IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const = 0;
+
+  // Similar to getDefaultDeviceTarget, but always returns a DeviceTargetAttr
+  // that is configured for the host, regardless of if flags/environment were
+  // configured to cross-target in some way.
+  virtual std::optional<IREE::HAL::DeviceTargetAttr>
+  getHostDeviceTarget(MLIRContext *context,
+                      const TargetRegistry &targetRegistry) const {
+    return {};
+  }
+
+  // TODO(benvanik): pipeline registration for specialization of host code at
+  // various stages.
+};
+
+} // namespace mlir::iree_compiler::IREE::HAL
+
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_TARGETDEVICE_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp
index 2bac06c..f24a3c4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp

@@ -10,63 +10,109 @@
 
 namespace mlir::iree_compiler::IREE::HAL {
 
+//===----------------------------------------------------------------------===//
+// TargetRegistration
+//===----------------------------------------------------------------------===//
+
 // Returns the static registry of translator names to translation functions.
-static TargetBackendRegistry &getMutableTargetRegistry() {
-  static TargetBackendRegistry global;
+static TargetRegistry &getMutableTargetRegistry() {
+  static TargetRegistry global;
   return global;
 }
 
-const TargetBackendRegistry &TargetBackendRegistry::getGlobal() {
-  return getMutableTargetRegistry();
-}
-
-TargetBackendRegistration::TargetBackendRegistration(llvm::StringRef name,
-                                                     CreateTargetBackendFn fn,
-                                                     bool registerStaticGlobal)
-    : initFn(std::move(fn)) {
+TargetBackendRegistration::TargetBackendRegistration(
+    llvm::StringRef name, TargetFactoryFn<TargetBackend> fn,
+    bool registerStaticGlobal)
+    : TargetRegistration<TargetBackend>(std::move(fn)) {
   if (registerStaticGlobal) {
     auto &registry = getMutableTargetRegistry();
-    if (registry.registrations.contains(name)) {
+    if (registry.backendRegistrations.contains(name)) {
       llvm::report_fatal_error(
           "Attempting to overwrite an existing translation backend");
     }
-    assert(initFn && "Attempting to register an empty translation function");
-    registry.registrations[name] = this;
+    assert(initFn &&
+           "Attempting to register an empty backend factory function");
+    registry.backendRegistrations[name] = this;
   }
 }
 
-std::shared_ptr<TargetBackend> TargetBackendRegistration::acquire() {
-  std::call_once(initFlag, [&]() { cachedValue = initFn(); });
-  return cachedValue;
+TargetDeviceRegistration::TargetDeviceRegistration(
+    llvm::StringRef name, TargetFactoryFn<TargetDevice> fn,
+    bool registerStaticGlobal)
+    : TargetRegistration<TargetDevice>(std::move(fn)) {
+  if (registerStaticGlobal) {
+    auto &registry = getMutableTargetRegistry();
+    if (registry.deviceRegistrations.contains(name)) {
+      llvm::report_fatal_error(
+          "Attempting to overwrite an existing target device");
+    }
+    assert(initFn && "Attempting to register an empty device factory function");
+    registry.deviceRegistrations[name] = this;
+  }
 }
 
-void TargetBackendRegistry::mergeFrom(const TargetBackendList &targets) {
-  for (auto &it : targets.entries) {
-    if (registrations.contains(it.first)) {
+//===----------------------------------------------------------------------===//
+// TargetRegistry
+//===----------------------------------------------------------------------===//
+
+const TargetRegistry &TargetRegistry::getGlobal() {
+  return getMutableTargetRegistry();
+}
+
+void TargetRegistry::mergeFrom(const TargetBackendList &targetBackends) {
+  for (auto &it : targetBackends.entries) {
+    if (backendRegistrations.contains(it.first)) {
       llvm::report_fatal_error(
           "Attempting to overwrite an existing translation backend");
     }
     auto registration = std::make_unique<TargetBackendRegistration>(
         it.first, it.second, /*registerStaticGlobal=*/false);
-    registrations[it.first] = registration.get();
-    ownedRegistrations.push_back(std::move(registration));
+    backendRegistrations[it.first] = registration.get();
+    ownedBackendRegistrations.push_back(std::move(registration));
   }
 }
 
-void TargetBackendRegistry::mergeFrom(const TargetBackendRegistry &registry) {
-  for (auto &it : registry.registrations) {
-    if (registrations.contains(it.first())) {
+void TargetRegistry::mergeFrom(const TargetDeviceList &targetDevices) {
+  for (auto &it : targetDevices.entries) {
+    if (deviceRegistrations.contains(it.first)) {
+      llvm::report_fatal_error("Attempting to overwrite an existing device");
+    }
+    auto registration = std::make_unique<TargetDeviceRegistration>(
+        it.first, it.second, /*registerStaticGlobal=*/false);
+    deviceRegistrations[it.first] = registration.get();
+    ownedDeviceRegistrations.push_back(std::move(registration));
+  }
+}
+
+void TargetRegistry::mergeFrom(const TargetRegistry &registry) {
+  for (auto &it : registry.deviceRegistrations) {
+    if (deviceRegistrations.contains(it.first())) {
+      llvm::report_fatal_error("Attempting to overwrite an existing device");
+    }
+    deviceRegistrations[it.first()] = it.second;
+  }
+  for (auto &it : registry.backendRegistrations) {
+    if (backendRegistrations.contains(it.first())) {
       llvm::report_fatal_error(
           "Attempting to overwrite an existing translation backend");
     }
-    registrations[it.first()] = it.second;
+    backendRegistrations[it.first()] = it.second;
   }
 }
 
-std::vector<std::string>
-TargetBackendRegistry::getRegisteredTargetBackends() const {
+std::vector<std::string> TargetRegistry::getRegisteredTargetBackends() const {
   std::vector<std::string> result;
-  for (auto &entry : registrations) {
+  for (auto &entry : backendRegistrations) {
+    result.push_back(entry.getKey().str());
+  }
+  std::sort(result.begin(), result.end(),
+            [](const auto &a, const auto &b) { return a < b; });
+  return result;
+}
+
+std::vector<std::string> TargetRegistry::getRegisteredTargetDevices() const {
+  std::vector<std::string> result;
+  for (auto &entry : deviceRegistrations) {
     result.push_back(entry.getKey().str());
   }
   std::sort(result.begin(), result.end(),
@@ -75,8 +121,18 @@
 }
 
 std::shared_ptr<TargetBackend>
-TargetBackendRegistry::getTargetBackend(StringRef targetName) const {
-  for (auto &entry : registrations) {
+TargetRegistry::getTargetBackend(StringRef targetName) const {
+  for (auto &entry : backendRegistrations) {
+    if (entry.getKey() == targetName) {
+      return entry.getValue()->acquire();
+    }
+  }
+  return {};
+}
+
+std::shared_ptr<TargetDevice>
+TargetRegistry::getTargetDevice(StringRef targetName) const {
+  for (auto &entry : deviceRegistrations) {
     if (entry.getKey() == targetName) {
       return entry.getValue()->acquire();
     }
@@ -85,74 +141,64 @@
 }
 
 SmallVector<std::shared_ptr<TargetBackend>>
-TargetBackendRegistry::getTargetBackends(
-    ArrayRef<std::string> targetNames) const {
-  SmallVector<std::shared_ptr<TargetBackend>> matches;
-  for (auto targetName : targetNames) {
+TargetRegistry::getTargetBackends(ArrayRef<std::string> targetNames) const {
+  SmallVector<std::pair<std::string, std::shared_ptr<TargetBackend>>> matches;
+  for (auto &targetName : targetNames) {
     auto targetBackend = getTargetBackend(targetName);
     if (targetBackend) {
-      matches.push_back(std::move(targetBackend));
+      matches.push_back(std::make_pair(targetName, std::move(targetBackend)));
     }
   }
   // To ensure deterministic builds we sort matches by name.
   std::sort(matches.begin(), matches.end(),
-            [](const auto &a, const auto &b) { return a->name() < b->name(); });
-  return matches;
+            [](const auto &a, const auto &b) { return a.first < b.first; });
+  return llvm::to_vector(llvm::map_range(
+      matches, [](auto match) { return std::move(match.second); }));
 }
 
-SmallVector<std::string>
-gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp) {
-  SmallVector<std::string> targetNames;
-  llvm::SmallDenseSet<StringRef> targets;
-  executableOp.walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
-    auto targetName = variantOp.getTarget().getBackend().getValue();
-    if (targets.insert(targetName).second) {
-      targetNames.push_back(targetName.str());
+SmallVector<std::shared_ptr<TargetDevice>>
+TargetRegistry::getTargetDevices(ArrayRef<std::string> targetNames) const {
+  SmallVector<std::pair<std::string, std::shared_ptr<TargetDevice>>> matches;
+  for (auto &targetName : targetNames) {
+    auto targetDevice = getTargetDevice(targetName);
+    if (targetDevice) {
+      matches.push_back(std::make_pair(targetName, std::move(targetDevice)));
     }
-  });
-  llvm::stable_sort(targetNames);
-  return targetNames;
-}
-
-SmallVector<std::string> gatherExecutableTargetNames(mlir::ModuleOp moduleOp) {
-  SmallVector<std::string> targetNames;
-  llvm::stable_sort(targetNames);
-  llvm::SmallDenseSet<StringRef> targets;
-  moduleOp.walk([&](IREE::HAL::ExecutableOp executableOp) {
-    executableOp.walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
-      auto targetName = variantOp.getTarget().getBackend().getValue();
-      if (targets.insert(targetName).second) {
-        targetNames.push_back(targetName.str());
-      }
-    });
-  });
-  return targetNames;
+  }
+  // To ensure deterministic builds we sort matches by name.
+  std::sort(matches.begin(), matches.end(),
+            [](const auto &a, const auto &b) { return a.first < b.first; });
+  return llvm::to_vector(llvm::map_range(
+      matches, [](auto match) { return std::move(match.second); }));
 }
 
 } // namespace mlir::iree_compiler::IREE::HAL
 
+//===----------------------------------------------------------------------===//
+// TargetRegistryRef
+//===----------------------------------------------------------------------===//
+
 namespace llvm::cl {
-template class basic_parser<TargetBackendRegistryRef>;
+template class basic_parser<TargetRegistryRef>;
 } // namespace llvm::cl
 
-using TargetBackendRegistryRef = llvm::cl::TargetBackendRegistryRef;
+using TargetRegistryRef = llvm::cl::TargetRegistryRef;
 
 // Return true on error.
-bool llvm::cl::parser<TargetBackendRegistryRef>::parse(
-    Option &O, StringRef ArgName, StringRef Arg,
-    TargetBackendRegistryRef &Val) {
+bool llvm::cl::parser<TargetRegistryRef>::parse(Option &O, StringRef ArgName,
+                                                StringRef Arg,
+                                                TargetRegistryRef &Val) {
   // We ignore Arg here and just use the global registry. We could parse a list
   // of target backends and create a new registry with just that subset but
   // ownership gets tricky.
   if (Arg != "global")
     return true;
-  Val.value =
-      &mlir::iree_compiler::IREE::HAL::TargetBackendRegistry::getGlobal();
+  Val.value = &mlir::iree_compiler::IREE::HAL::TargetRegistry::getGlobal();
   return false;
 }
 
-void llvm::cl::parser<TargetBackendRegistryRef>::printOptionDiff(
-    const Option &O, TargetBackendRegistryRef V, const OptVal &Default,
+void llvm::cl::parser<TargetRegistryRef>::printOptionDiff(
+    const Option &O, TargetRegistryRef V, const OptVal &Default,
     size_t GlobalWidth) const {
   printOptionName(O, GlobalWidth);
   std::string Str = "global";
@@ -160,4 +206,4 @@
   outs().indent(2) << " (default: global)\n";
 }
 
-void llvm::cl::parser<TargetBackendRegistryRef>::anchor() {}
+void llvm::cl::parser<TargetRegistryRef>::anchor() {}

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h
index 07f933d..21d9f65 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h

@@ -12,6 +12,7 @@
 #include <vector>
 
 #include "iree/compiler/Dialect/HAL/Target/TargetBackend.h"
+#include "iree/compiler/Dialect/HAL/Target/TargetDevice.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -19,118 +20,145 @@
 
 namespace mlir::iree_compiler::IREE::HAL {
 
-using CreateTargetBackendFn = std::function<std::shared_ptr<TargetBackend>()>;
+//===----------------------------------------------------------------------===//
+// TargetRegistration
+//===----------------------------------------------------------------------===//
 
-// Registers an executable translation target backend creation function.
-//
-// For example:
-//   llvm-aot-x86_64
-//   llvm-aot-armv8-dotprod
-//   llvm-jit
-//   vulkan-v1.1-low
-//   vulkan-v1.1-high
-class TargetBackendRegistration {
+template <typename T>
+using TargetFactoryFn = std::function<std::shared_ptr<T>()>;
+
+template <typename T>
+class TargetRegistration {
 public:
-  // TODO: Remove the registerStaticGlobal mode once callers are migrated.
-  TargetBackendRegistration(StringRef name, CreateTargetBackendFn fn,
-                            bool registerStaticGlobal = true);
+  TargetRegistration(TargetFactoryFn<T> fn) : initFn(std::move(fn)) {}
+  std::shared_ptr<T> acquire() {
+    std::call_once(initFlag, [&]() { cachedValue = initFn(); });
+    return cachedValue;
+  }
 
-  std::shared_ptr<TargetBackend> acquire();
-
-private:
-  CreateTargetBackendFn initFn;
+protected:
+  TargetFactoryFn<T> initFn;
   std::once_flag initFlag;
-  std::shared_ptr<TargetBackend> cachedValue;
+  std::shared_ptr<T> cachedValue;
+};
+class TargetBackendRegistration : public TargetRegistration<TargetBackend> {
+public:
+  // TODO(#15468): remove the registerStaticGlobal mode once callers are
+  // migrated and move the constructor to the template type.
+  TargetBackendRegistration(StringRef name, TargetFactoryFn<TargetBackend> fn,
+                            bool registerStaticGlobal = true);
+};
+class TargetDeviceRegistration : public TargetRegistration<TargetDevice> {
+public:
+  // TODO(#15468): remove the registerStaticGlobal mode once callers are
+  // migrated and move the constructor to the template type.
+  TargetDeviceRegistration(StringRef name, TargetFactoryFn<TargetDevice> fn,
+                           bool registerStaticGlobal = true);
 };
 
-// A registry of target
-class TargetBackendList {
+template <typename T>
+class TargetFactoryList {
 public:
-  void add(llvm::StringRef name, CreateTargetBackendFn fn) {
-    entries.push_back(std::make_pair(name, fn));
+  void add(llvm::StringRef name, TargetFactoryFn<T> fn) {
+    entries.push_back(std::make_pair(name.str(), fn));
   }
 
 private:
-  llvm::SmallVector<std::pair<llvm::StringRef, CreateTargetBackendFn>> entries;
-  friend class TargetBackendRegistry;
+  llvm::SmallVector<std::pair<std::string, TargetFactoryFn<T>>> entries;
+  friend class TargetRegistry;
 };
+class TargetBackendList : public TargetFactoryList<TargetBackend> {};
+class TargetDeviceList : public TargetFactoryList<TargetDevice> {};
 
-// A concrete target backend registry.
-class TargetBackendRegistry {
+//===----------------------------------------------------------------------===//
+// TargetRegistry
+//===----------------------------------------------------------------------===//
+
+// A concrete target registry.
+class TargetRegistry {
 public:
-  // Merge from a list of of targets. The registry will own the registration
-  // entries.
-  void mergeFrom(const TargetBackendList &targets);
+  // Returns the read-only global registry.
+  // This is used by passes which depend on it from their default constructor.
+  static const TargetRegistry &getGlobal();
+
+  // Merge from a list of of target backends.
+  // The receiving registry will own the registration entries.
+  void mergeFrom(const TargetBackendList &targetBackends);
+  // Merge from a list of of target devices.
+  // The receiving registry will own the registration entries.
+  void mergeFrom(const TargetDeviceList &targetDevices);
+
   // Initialize from an existing registry. This registry will not own the
   // backing registration entries. The source registry must remain live for the
   // life of this.
-  void mergeFrom(const TargetBackendRegistry &registry);
-
-  // Returns the read-only global registry. This is used by passes which depend
-  // on it from their default constructor.
-  static const TargetBackendRegistry &getGlobal();
+  void mergeFrom(const TargetRegistry &registry);
 
   // Returns a list of registered target backends.
   std::vector<std::string> getRegisteredTargetBackends() const;
+  // Returns a list of registered target devices.
+  std::vector<std::string> getRegisteredTargetDevices() const;
 
   // Returns the target backend with the given name.
   std::shared_ptr<TargetBackend> getTargetBackend(StringRef targetName) const;
+  // Returns the target device with the given name.
+  std::shared_ptr<TargetDevice> getTargetDevice(StringRef targetName) const;
 
   // Returns one backend per entry in |targetNames|.
   SmallVector<std::shared_ptr<TargetBackend>>
   getTargetBackends(ArrayRef<std::string> targetNames) const;
+  // Returns one device per entry in |targetNames|.
+  SmallVector<std::shared_ptr<TargetDevice>>
+  getTargetDevices(ArrayRef<std::string> targetNames) const;
 
 private:
-  llvm::StringMap<TargetBackendRegistration *> registrations;
+  llvm::StringMap<TargetBackendRegistration *> backendRegistrations;
   llvm::SmallVector<std::unique_ptr<TargetBackendRegistration>>
-      ownedRegistrations;
+      ownedBackendRegistrations;
+  llvm::StringMap<TargetDeviceRegistration *> deviceRegistrations;
+  llvm::SmallVector<std::unique_ptr<TargetDeviceRegistration>>
+      ownedDeviceRegistrations;
 
+  // TODO(#15468): remove this when not used by LLVMCPU/VulkanSPIRV.
   friend class TargetBackendRegistration;
+  friend class TargetDeviceRegistration;
 };
 
-// Returns a sorted uniqued set of target backends used in the executable.
-SmallVector<std::string>
-gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp);
-
-// Returns a sorted uniqued set of target backends used in the entire module.
-SmallVector<std::string> gatherExecutableTargetNames(mlir::ModuleOp moduleOp);
-
 } // namespace mlir::iree_compiler::IREE::HAL
 
+//===----------------------------------------------------------------------===//
+// TargetRegistryRef
+//===----------------------------------------------------------------------===//
+
 namespace llvm::cl {
 
-struct TargetBackendRegistryRef {
-  const mlir::iree_compiler::IREE::HAL::TargetBackendRegistry *value =
-      &mlir::iree_compiler::IREE::HAL::TargetBackendRegistry::getGlobal();
-  TargetBackendRegistryRef() = default;
-  TargetBackendRegistryRef(
-      const mlir::iree_compiler::IREE::HAL::TargetBackendRegistry &value)
+struct TargetRegistryRef {
+  const mlir::iree_compiler::IREE::HAL::TargetRegistry *value =
+      &mlir::iree_compiler::IREE::HAL::TargetRegistry::getGlobal();
+  TargetRegistryRef() = default;
+  TargetRegistryRef(const mlir::iree_compiler::IREE::HAL::TargetRegistry &value)
       : value(&value) {}
-  TargetBackendRegistryRef(
-      const mlir::iree_compiler::IREE::HAL::TargetBackendRegistry *value)
+  TargetRegistryRef(const mlir::iree_compiler::IREE::HAL::TargetRegistry *value)
       : value(value) {}
   operator bool() const noexcept {
     return value->getRegisteredTargetBackends() !=
-           mlir::iree_compiler::IREE::HAL::TargetBackendRegistry::getGlobal()
+           mlir::iree_compiler::IREE::HAL::TargetRegistry::getGlobal()
                .getRegisteredTargetBackends();
   }
-  const mlir::iree_compiler::IREE::HAL::TargetBackendRegistry *
-  operator->() const {
+  const mlir::iree_compiler::IREE::HAL::TargetRegistry *operator->() const {
     return value;
   }
 };
 
-extern template class basic_parser<TargetBackendRegistryRef>;
+extern template class basic_parser<TargetRegistryRef>;
 
 template <>
-class parser<TargetBackendRegistryRef>
-    : public basic_parser<TargetBackendRegistryRef> {
+class parser<TargetRegistryRef> : public basic_parser<TargetRegistryRef> {
 public:
   parser(Option &O) : basic_parser(O) {}
   bool parse(Option &O, StringRef ArgName, StringRef Arg,
-             TargetBackendRegistryRef &Val);
-  StringRef getValueName() const override { return "target backend registry"; }
-  void printOptionDiff(const Option &O, TargetBackendRegistryRef V,
+             TargetRegistryRef &Val);
+  StringRef getValueName() const override { return "target registry"; }
+  void printOptionDiff(const Option &O, TargetRegistryRef V,
                        const OptVal &Default, size_t GlobalWidth) const;
   void anchor() override;
 };

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
index e99e7bd..11aff8e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp

@@ -110,42 +110,83 @@
   return {};
 }
 
-class VulkanSPIRVTargetBackend : public TargetBackend {
+// TODO: VulkanOptions for choosing the Vulkan version and extensions/features.
+class VulkanTargetDevice : public TargetDevice {
 public:
-  VulkanSPIRVTargetBackend(VulkanSPIRVTargetOptions options)
+  VulkanTargetDevice(VulkanSPIRVTargetOptions options)
       : options_(std::move(options)) {}
 
-  // NOTE: we could vary these based on the options such as 'vulkan-v1.1'.
-  std::string name() const override { return "vulkan"; }
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Codegen::IREECodegenDialect, Vulkan::VulkanDialect,
-                    spirv::SPIRVDialect, gpu::GPUDialect>();
-  }
-
   IREE::HAL::DeviceTargetAttr
-  getDefaultDeviceTarget(MLIRContext *context) const override {
+  getDefaultDeviceTarget(MLIRContext *context,
+                         const TargetRegistry &targetRegistry) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
     auto configAttr = b.getDictionaryAttr(configItems);
 
+    SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+    targetRegistry.getTargetBackend("vulkan-spirv")
+        ->getDefaultExecutableTargets(context, "vulkan", configAttr,
+                                      executableTargetAttrs);
+
+    return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("vulkan"),
+                                            configAttr, executableTargetAttrs);
+  }
+
+private:
+  VulkanSPIRVTargetOptions options_;
+};
+
+class VulkanSPIRVTargetBackend : public TargetBackend {
+public:
+  VulkanSPIRVTargetBackend(VulkanSPIRVTargetOptions options)
+      : options_(std::move(options)) {}
+
+  std::string getLegacyDefaultDeviceID() const override { return "vulkan"; }
+
+  void getDefaultExecutableTargets(
+      MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
+      SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
+      const override {
     // Select SPIR-V environments to compile for.
-    SmallVector<IREE::HAL::ExecutableTargetAttr> targetAttrs;
     for (std::string targetTripleOrEnv : options_.targetTriplesAndEnvs) {
-      targetAttrs.push_back(getExecutableTarget(
+      executableTargetAttrs.push_back(getExecutableTarget(
           context, getSPIRVTargetEnv(targetTripleOrEnv, context),
           options_.indirectBindings));
     }
+
     // If no environment specified, populate with a minimal target.
-    if (targetAttrs.empty()) {
-      targetAttrs.push_back(getExecutableTarget(
+    if (executableTargetAttrs.empty()) {
+      executableTargetAttrs.push_back(getExecutableTarget(
           context, getSPIRVTargetEnv("unknown-unknown-unknown", context),
           options_.indirectBindings));
     }
+  }
 
-    return IREE::HAL::DeviceTargetAttr::get(
-        context, b.getStringAttr(deviceID()), configAttr, targetAttrs);
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context, spirv::TargetEnvAttr targetEnv,
+                      bool indirectBindings) const {
+    Builder b(context);
+    SmallVector<NamedAttribute> configItems;
+    auto addConfig = [&](StringRef name, Attribute value) {
+      configItems.emplace_back(b.getStringAttr(name), value);
+    };
+
+    addConfig(spirv::getTargetEnvAttrName(), targetEnv);
+    if (indirectBindings) {
+      addConfig("hal.bindings.indirect", b.getUnitAttr());
+    }
+
+    return IREE::HAL::ExecutableTargetAttr::get(
+        context, b.getStringAttr("vulkan-spirv"),
+        indirectBindings ? b.getStringAttr("vulkan-spirv-fb-ptr")
+                         : b.getStringAttr("vulkan-spirv-fb"),
+        b.getDictionaryAttr(configItems));
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<IREE::Codegen::IREECodegenDialect, Vulkan::VulkanDialect,
+                    spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
   void buildConfigurationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
@@ -401,41 +442,20 @@
   }
 
 private:
-  IREE::HAL::ExecutableTargetAttr
-  getExecutableTarget(MLIRContext *context, spirv::TargetEnvAttr targetEnv,
-                      bool indirectBindings) const {
-    Builder b(context);
-    SmallVector<NamedAttribute> configItems;
-
-    configItems.emplace_back(b.getStringAttr(spirv::getTargetEnvAttrName()),
-                             targetEnv);
-    if (indirectBindings) {
-      configItems.emplace_back(b.getStringAttr("hal.bindings.indirect"),
-                               UnitAttr::get(context));
-    }
-
-    auto configAttr = b.getDictionaryAttr(configItems);
-    return IREE::HAL::ExecutableTargetAttr::get(
-        context, b.getStringAttr("vulkan"),
-        indirectBindings ? b.getStringAttr("vulkan-spirv-fb-ptr")
-                         : b.getStringAttr("vulkan-spirv-fb"),
-        configAttr);
-  }
-
   VulkanSPIRVTargetOptions options_;
 };
 
 void registerVulkanSPIRVTargetBackends(
     std::function<VulkanSPIRVTargetOptions()> queryOptions) {
   getVulkanSPIRVTargetOptionsFromFlags();
-  auto backendFactory = [=]() {
-    return std::make_shared<VulkanSPIRVTargetBackend>(queryOptions());
-  };
   // #hal.device.target<"vulkan", ...
-  static TargetBackendRegistration registration0("vulkan", backendFactory);
+  static TargetDeviceRegistration registration0("vulkan", [=]() {
+    return std::make_shared<VulkanTargetDevice>(queryOptions());
+  });
   // #hal.executable.target<"vulkan-spirv", ...
-  static TargetBackendRegistration registration1("vulkan-spirv",
-                                                 backendFactory);
+  static TargetBackendRegistration registration1("vulkan-spirv", [=]() {
+    return std::make_shared<VulkanSPIRVTargetBackend>(queryOptions());
+  });
 }
 
 } // namespace mlir::iree_compiler::IREE::HAL

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp
index 8bf236e..7e0b5d4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp

@@ -58,36 +58,44 @@
 
     // If no targets are specified we can't do anything - another pass earlier
     // in the pipeline will have had to add the targets.
-    if (targets.empty()) {
+    if (targetBackends.empty()) {
       emitRemark(moduleOp.getLoc())
-          << "no target HAL devices specified during assignment";
+          << "no target HAL target backends specified during assignment";
       return;
     }
 
     llvm::SmallDenseSet<Attribute> targetAttrSet;
     SmallVector<Attribute> targetAttrs;
-    for (const auto &targetName : targets) {
-      auto targetBackend = targetRegistry->getTargetBackend(targetName);
+    for (const auto &targetBackendName : targetBackends) {
+      auto targetBackend = targetRegistry->getTargetBackend(targetBackendName);
       if (!targetBackend) {
         std::string backends;
         llvm::raw_string_ostream os(backends);
-        llvm::interleaveComma(
-            targetRegistry->getTargetBackends(
-                targetRegistry->getRegisteredTargetBackends()),
-            os,
-            [&os](const std::shared_ptr<
-                  mlir::iree_compiler::IREE::HAL::TargetBackend>
-                      b) { os << b->name(); });
+        llvm::interleaveComma(targetRegistry->getRegisteredTargetBackends(), os,
+                              [&os](const std::string &name) { os << name; });
         emitError(moduleOp.getLoc())
-            << "target backend '" << targetName
+            << "target backend '" << targetBackendName
             << "' not registered; registered backends: " << os.str();
         signalPassFailure();
         return;
       }
+      auto targetDeviceName = targetBackend->getLegacyDefaultDeviceID();
+      auto targetDevice = targetRegistry->getTargetDevice(targetDeviceName);
+      if (!targetDevice) {
+        std::string devices;
+        llvm::raw_string_ostream os(devices);
+        llvm::interleaveComma(targetRegistry->getRegisteredTargetDevices(), os,
+                              [&os](const std::string &name) { os << name; });
+        emitError(moduleOp.getLoc())
+            << "target device '" << targetDeviceName
+            << "' not registered; registered devices: " << os.str();
+        signalPassFailure();
+        return;
+      }
 
       // Ask the target backend for its default device specification attribute.
-      auto targetAttr =
-          targetBackend->getDefaultDeviceTarget(moduleOp.getContext());
+      auto targetAttr = targetDevice->getDefaultDeviceTarget(
+          moduleOp.getContext(), *targetRegistry.value);
       if (!targetAttrSet.contains(targetAttr)) {
         targetAttrSet.insert(targetAttr);
         targetAttrs.push_back(targetAttr);

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
index 776b36b..273b2db 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp

@@ -183,9 +183,9 @@
 // --iree-hal-configuration-pipeline
 //===----------------------------------------------------------------------===//
 
-void buildHALConfigurationPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
-    const TargetOptions &targetOptions) {
+void buildHALConfigurationPassPipeline(OpPassManager &passManager,
+                                       const TargetRegistry &targetRegistry,
+                                       const TargetOptions &targetOptions) {
   //----------------------------------------------------------------------------
   // Input cleanup and simplification
   //----------------------------------------------------------------------------
@@ -249,7 +249,7 @@
 //===----------------------------------------------------------------------===//
 
 void buildHALTransformPassPipeline(OpPassManager &passManager,
-                                   const TargetBackendRegistry &targetRegistry,
+                                   const TargetRegistry &targetRegistry,
                                    const TargetOptions &targetOptions,
                                    const TransformOptions &transformOptions,
                                    PipelinePhase compileFrom,
@@ -475,7 +475,7 @@
 }
 
 void buildHALTransformPassPipeline(OpPassManager &passManager,
-                                   const TargetBackendRegistry &targetRegistry,
+                                   const TargetRegistry &targetRegistry,
                                    const TargetOptions &targetOptions,
                                    PipelinePhase compileFrom,
                                    PipelinePhase compileTo) {
@@ -506,18 +506,17 @@
                              "Runs HAL target configuration pipeline.",
                              [](OpPassManager &passManager) {
                                buildHALConfigurationPassPipeline(
-                                   passManager,
-                                   TargetBackendRegistry::getGlobal(),
+                                   passManager, TargetRegistry::getGlobal(),
                                    TargetOptions::FromFlags::get());
                              });
   PassPipelineRegistration<TransformOptions>(
       "iree-hal-transformation-pipeline",
       "Runs the full IREE HAL conversion/lowering pipeline.",
       [](OpPassManager &passManager, const TransformOptions &transformOptions) {
-        buildHALTransformPassPipeline(
-            passManager, TargetBackendRegistry::getGlobal(),
-            TargetOptions::FromFlags::get(), transformOptions,
-            PipelinePhase::Start, PipelinePhase::End);
+        buildHALTransformPassPipeline(passManager, TargetRegistry::getGlobal(),
+                                      TargetOptions::FromFlags::get(),
+                                      transformOptions, PipelinePhase::Start,
+                                      PipelinePhase::End);
       });
 }
 

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h
index 2329baa..54034b2 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h

@@ -39,9 +39,9 @@
 // Adds a set of passes to the given pass manager that run the head of the HAL
 // pipeline to assign devices, materialize interfaces, and translate
 // executables. The host portion of the program is annotated but not modified.
-void buildHALConfigurationPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
-    const TargetOptions &targetOptions);
+void buildHALConfigurationPassPipeline(OpPassManager &passManager,
+                                       const TargetRegistry &targetRegistry,
+                                       const TargetOptions &targetOptions);
 
 // Adds a set of passes to the given pass manager that run the required HAL
 // transforms in the canonical order.
@@ -54,7 +54,7 @@
 //   buildHALTransformPassPipeline & run
 //   <run conversion from HAL to vm/etc>
 void buildHALTransformPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
+    OpPassManager &passManager, const TargetRegistry &targetRegistry,
     const TargetOptions &targetOptions,
     PipelinePhase compileFrom = PipelinePhase::Start,
     PipelinePhase compileTo = PipelinePhase::End);

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td
index d40451c..5ef5ee3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td

@@ -54,7 +54,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
   ];
@@ -69,13 +69,13 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     ListOption<
-      "targets", "targets",
+      "targetBackends", "targetBackends",
       "std::string",
-      "List of devices to assign as targets."
+      "List of target backends to assign as device targets."
     >,
   ];
 }
@@ -212,7 +212,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
   ];
@@ -228,7 +228,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     Option<
@@ -250,7 +250,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
   ];
@@ -267,7 +267,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     Option<
@@ -299,7 +299,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
   ];
@@ -317,7 +317,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     Option<
@@ -353,7 +353,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     Option<
@@ -385,7 +385,7 @@
   let options = [
     Option<
       "targetRegistry", "target-registry",
-      "llvm::cl::TargetBackendRegistryRef", "",
+      "llvm::cl::TargetRegistryRef", "",
       "Target backend registry containing the list of available backends."
     >,
     Option<

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp
index 64a003d..7362b92 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp

@@ -69,28 +69,48 @@
 
     // Verify each target is registered.
     for (auto attr : targetsAttr) {
-      auto targetAttr = llvm::dyn_cast<IREE::HAL::DeviceTargetAttr>(attr);
-      if (!targetAttr) {
+      auto deviceTargetAttr = llvm::dyn_cast<IREE::HAL::DeviceTargetAttr>(attr);
+      if (!deviceTargetAttr) {
         moduleOp.emitError() << "invalid target attr type: " << attr;
         signalPassFailure();
         return;
       }
 
-      auto targetBackend =
-          targetRegistry->getTargetBackend(targetAttr.getDeviceID().getValue());
-      if (!targetBackend) {
+      auto targetDevice = targetRegistry->getTargetDevice(
+          deviceTargetAttr.getDeviceID().getValue());
+      if (!targetDevice) {
         auto diagnostic = moduleOp.emitError();
         diagnostic
-            << "unregistered target backend " << targetAttr.getDeviceID()
+            << "unregistered target device " << deviceTargetAttr.getDeviceID()
             << "; ensure it is linked in to the compiler (available = [ ";
         for (const auto &targetName :
-             targetRegistry->getRegisteredTargetBackends()) {
+             targetRegistry->getRegisteredTargetDevices()) {
           diagnostic << "'" << targetName << "' ";
         }
         diagnostic << "])";
         signalPassFailure();
         return;
       }
+
+      for (auto executableTargetAttr :
+           deviceTargetAttr.getExecutableTargets()) {
+        auto targetBackend = targetRegistry->getTargetBackend(
+            executableTargetAttr.getBackend().getValue());
+        if (!targetBackend) {
+          auto diagnostic = moduleOp.emitError();
+          diagnostic
+              << "unregistered target backend "
+              << executableTargetAttr.getBackend()
+              << "; ensure it is linked in to the compiler (available = [ ";
+          for (const auto &targetName :
+               targetRegistry->getRegisteredTargetBackends()) {
+            diagnostic << "'" << targetName << "' ";
+          }
+          diagnostic << "])";
+          signalPassFailure();
+          return;
+        }
+      }
     }
   }
 };

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/assign_target_devices.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/assign_target_devices.mlir
index 37fb72a..a889c7d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/assign_target_devices.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/assign_target_devices.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices)' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-0
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vmvx})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-1
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vmvx,vmvx-inline})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-2
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vmvx,vmvx})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-EQ
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vmvx})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-1
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vmvx,vmvx-inline})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-2
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vmvx,vmvx})' %s | FileCheck %s --check-prefix=CHECK --check-prefix=TARGET-EQ
 
 // TARGET-1: #device_target_vmvx = #hal.device.target<"vmvx"
 

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
index cfa6152..81f1e1c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir

@@ -21,7 +21,7 @@
 
 // -----
 
-// expected-error@+1 {{unregistered target backend "foo"}}
+// expected-error@+1 {{unregistered target device "foo"}}
 module @module attributes {hal.device.targets = [#hal.device.target<"foo">]} {
   util.func private @func() -> ()
 }

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/multiple_target_env_conversion.mlir b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/multiple_target_env_conversion.mlir
index 02add52..e51577f 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/multiple_target_env_conversion.mlir
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/multiple_target_env_conversion.mlir

@@ -1,4 +1,4 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' \
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' \
 // RUN:   --iree-vulkan-target-triple=rdna3-7900xtx-windows \
 // RUN:   --iree-vulkan-target-env="#vk.target_env<v1.1, r(120), [VK_KHR_spirv_1_4, VK_KHR_storage_buffer_storage_class], AMD:DiscreteGPU, #vk.caps<maxComputeSharedMemorySize = 16384, maxComputeWorkGroupInvocations = 1024, maxComputeWorkGroupSize = dense<[128, 8, 4]>: vector<3xi32>, subgroupFeatures = 63 : i32, subgroupSize = 4 >>" \
 // RUN:   --iree-vulkan-target-triple=valhall-unknown-android31 \

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
index 38c4a82..69d1010 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir

@@ -1,12 +1,12 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' %s | FileCheck %s --check-prefix=DEFAULT
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=adreno-a650-android30 %s | FileCheck %s --check-prefix=ADRENO
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=valhall-unknown-android31 %s | FileCheck %s --check-prefix=VALHALL
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=turing-t4-linux %s | FileCheck %s --check-prefix=TURING
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=rdna1-5700xt-windows %s | FileCheck %s --check-prefix=RDNA1
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=rdna3-6900xtx-windows %s | FileCheck %s --check-prefix=RDNA3
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=m1-moltenvk-macos %s | FileCheck %s --check-prefix=M1
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=arc-770-windows %s | FileCheck %s --check-prefix=ARC
-// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=pascal-1080-windows %s | FileCheck %s --check-prefix=PASCAL
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' %s | FileCheck %s --check-prefix=DEFAULT
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=adreno-a650-android30 %s | FileCheck %s --check-prefix=ADRENO
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=valhall-unknown-android31 %s | FileCheck %s --check-prefix=VALHALL
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=turing-t4-linux %s | FileCheck %s --check-prefix=TURING
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=rdna1-5700xt-windows %s | FileCheck %s --check-prefix=RDNA1
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=rdna3-6900xtx-windows %s | FileCheck %s --check-prefix=RDNA3
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=m1-moltenvk-macos %s | FileCheck %s --check-prefix=M1
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=arc-770-windows %s | FileCheck %s --check-prefix=ARC
+// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=vulkan-spirv},iree-hal-transformation-pipeline{serialize-executables=false})' --iree-vulkan-target-triple=pascal-1080-windows %s | FileCheck %s --check-prefix=PASCAL
 
 // TODO(antiagainst): Passing in lenghty strings as command-line options is not
 // optimal. We should consider creating a dedicated test pass to pick up

diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir
index 475f965..5a49b13 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir

@@ -31,7 +31,7 @@
 
 // -----
 
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb">
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">
 #map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
 #map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -70,7 +70,7 @@
 #map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {target_triple = "x86_64-none-elf", cpu_features = "+avx512f"}>
 #device_target_llvm_cpu = #hal.device.target<"llvm-cpu", [#executable_target_embedded_elf_x86_64_]>
-#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb">
+#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">
 #device_target_vulkan = #hal.device.target<"vulkan", [#executable_target_vulkan_spirv_fb]>
 module attributes {hal.device.targets = [#device_target_vulkan, #device_target_llvm_cpu]} {
   util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp
index 8132ae2..2c11a33 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp

@@ -47,7 +47,7 @@
 //===----------------------------------------------------------------------===//
 
 void buildHALInlineStaticTransformPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
+    OpPassManager &passManager, const TargetRegistry &targetRegistry,
     const TargetOptions &targetOptions) {
   //----------------------------------------------------------------------------
   // Device assignment and interface materialization
@@ -106,7 +106,7 @@
       "Runs the inline HAL dialect transformation pipeline",
       [](OpPassManager &passManager) {
         buildHALInlineStaticTransformPassPipeline(
-            passManager, TargetBackendRegistry::getGlobal(),
+            passManager, TargetRegistry::getGlobal(),
             TargetOptions::FromFlags::get());
       });
 }

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h
index 81d520c..ed0c8d9 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h

@@ -33,7 +33,7 @@
 //   buildHALInlineTransformPassPipeline & run
 //   <serialize VM module>
 void buildHALInlineStaticTransformPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
+    OpPassManager &passManager, const TargetRegistry &targetRegistry,
     const TargetOptions &targetOptions);
 
 //===----------------------------------------------------------------------===//

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp
index 91b303a..47f1bcd 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp

@@ -47,7 +47,7 @@
 //===----------------------------------------------------------------------===//
 
 void buildHALInlineDynamicTransformPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
+    OpPassManager &passManager, const TargetRegistry &targetRegistry,
     const TargetOptions &targetOptions) {
   //----------------------------------------------------------------------------
   // Device assignment and interface materialization
@@ -127,7 +127,7 @@
       "Runs the inline HAL executable loader dialect transformation pipeline",
       [](OpPassManager &passManager) {
         buildHALInlineDynamicTransformPassPipeline(
-            passManager, TargetBackendRegistry::getGlobal(),
+            passManager, TargetRegistry::getGlobal(),
             TargetOptions::FromFlags::get());
       });
 }

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h
index 4ed5161..6f65082 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h

@@ -33,7 +33,7 @@
 //   buildHALInlineDynamicTransformPassPipeline & run
 //   <serialize VM module>
 void buildHALInlineDynamicTransformPassPipeline(
-    OpPassManager &passManager, const TargetBackendRegistry &targetRegistry,
+    OpPassManager &passManager, const TargetRegistry &targetRegistry,
     const TargetOptions &targetOptions);
 
 //===----------------------------------------------------------------------===//

diff --git a/compiler/src/iree/compiler/Pipelines/Pipelines.cpp b/compiler/src/iree/compiler/Pipelines/Pipelines.cpp
index f8ebd27..7dce221 100644
--- a/compiler/src/iree/compiler/Pipelines/Pipelines.cpp
+++ b/compiler/src/iree/compiler/Pipelines/Pipelines.cpp

@@ -23,7 +23,7 @@
 namespace mlir::iree_compiler {
 
 void buildIREEPrecompileTransformPassPipeline(
-    const IREE::HAL::TargetBackendRegistry &targetRegistry,
+    const IREE::HAL::TargetRegistry &targetRegistry,
     BindingOptions bindingOptions, InputDialectOptions inputOptions,
     PreprocessingOptions preprocessingOptions,
     GlobalOptimizationOptions globalOptimizationOptions,
@@ -168,7 +168,7 @@
 }
 
 void buildIREEVMTransformPassPipeline(
-    const IREE::HAL::TargetBackendRegistry &targetRegistry,
+    const IREE::HAL::TargetRegistry &targetRegistry,
     BindingOptions bindingOptions, InputDialectOptions inputOptions,
     PreprocessingOptions preprocessingOptions,
     GlobalOptimizationOptions globalOptimizationOptions,
@@ -303,8 +303,8 @@
   highLevelOptimizations.constEval = false;
 
   buildIREEVMTransformPassPipeline(
-      IREE::HAL::TargetBackendRegistry::getGlobal(),
-      BindingOptions::FromFlags::get(), InputDialectOptions::FromFlags::get(),
+      IREE::HAL::TargetRegistry::getGlobal(), BindingOptions::FromFlags::get(),
+      InputDialectOptions::FromFlags::get(),
       PreprocessingOptions::FromFlags::get(), highLevelOptimizations,
       SchedulingOptions::FromFlags::get(),
       IREE::HAL::TargetOptions::FromFlags::get(),

diff --git a/compiler/src/iree/compiler/Pipelines/Pipelines.h b/compiler/src/iree/compiler/Pipelines/Pipelines.h
index be5d045..da4079c 100644
--- a/compiler/src/iree/compiler/Pipelines/Pipelines.h
+++ b/compiler/src/iree/compiler/Pipelines/Pipelines.h

@@ -85,7 +85,7 @@
 
 // Builds a pass pipeline to perform pre-compilation global optimizations.
 void buildIREEPrecompileTransformPassPipeline(
-    const IREE::HAL::TargetBackendRegistry &targetRegistry,
+    const IREE::HAL::TargetRegistry &targetRegistry,
     BindingOptions bindingOptions, InputDialectOptions inputOptions,
     PreprocessingOptions preprocessingOptions,
     GlobalOptimizationOptions highLevelOptimizationOptions,
@@ -101,7 +101,7 @@
 // If a |runTo| phase is specified the pipeline will stop and output the full
 // IR after the phase completes.
 void buildIREEVMTransformPassPipeline(
-    const IREE::HAL::TargetBackendRegistry &targetRegistry,
+    const IREE::HAL::TargetRegistry &targetRegistry,
     BindingOptions bindingOptions, InputDialectOptions inputOptions,
     PreprocessingOptions preprocessingOptions,
     GlobalOptimizationOptions highLevelOptimizationOptions,

diff --git a/compiler/src/iree/compiler/PluginAPI/Client.h b/compiler/src/iree/compiler/PluginAPI/Client.h
index 7d32fd9..3a9652d 100644
--- a/compiler/src/iree/compiler/PluginAPI/Client.h
+++ b/compiler/src/iree/compiler/PluginAPI/Client.h

@@ -27,6 +27,7 @@
 // bringing full dependencies into the plugin API.
 namespace mlir::iree_compiler::IREE::HAL {
 class TargetBackendList;
+class TargetDeviceList;
 } // namespace mlir::iree_compiler::IREE::HAL
 
 namespace mlir::iree_compiler {
@@ -183,6 +184,10 @@
   virtual void
   populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {}
 
+  // Populates new HAL target devices, if any, into the given list.
+  // Targets will be merged into the plugin session-owned registry.
+  virtual void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {}
+
 protected:
   // Called from registerDialects() prior to initializing the context and
   // prior to onActivate().

diff --git a/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp b/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp
index 79f8039..f2b5628 100644
--- a/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp
+++ b/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp

@@ -200,4 +200,11 @@
   }
 }
 
+void PluginManagerSession::populateHALTargetDevices(
+    IREE::HAL::TargetDeviceList &list) {
+  for (auto *s : initializedSessions) {
+    s->populateHALTargetDevices(list);
+  }
+}
+
 } // namespace mlir::iree_compiler

diff --git a/compiler/src/iree/compiler/PluginAPI/PluginManager.h b/compiler/src/iree/compiler/PluginAPI/PluginManager.h
index 214b929..7d5c4ff 100644
--- a/compiler/src/iree/compiler/PluginAPI/PluginManager.h
+++ b/compiler/src/iree/compiler/PluginAPI/PluginManager.h

@@ -137,6 +137,10 @@
   // plugins.
   void populateHALTargetBackends(IREE::HAL::TargetBackendList &list);
 
+  // Populates the given list of HAL target devices for all initialized
+  // plugins.
+  void populateHALTargetDevices(IREE::HAL::TargetDeviceList &list);
+
 private:
   PluginManagerOptions &options;
   // At construction, uninitialized plugin sessions are created for all

diff --git a/compiler/src/iree/compiler/Tools/init_targets.cc b/compiler/src/iree/compiler/Tools/init_targets.cc
index ce9b0f1..dd4135a 100644
--- a/compiler/src/iree/compiler/Tools/init_targets.cc
+++ b/compiler/src/iree/compiler/Tools/init_targets.cc

@@ -23,7 +23,6 @@
 // need.
 void registerHALTargetBackends() {
   static bool init_once = []() {
-
 #ifdef IREE_HAVE_LLVM_CPU_TARGET
     IREE::HAL::registerLLVMCPUTargetBackends(
         []() { return IREE::HAL::LLVMTargetOptions::getFromFlags(); });

diff --git a/experimental/regression_suite/tests/pregenerated/test_llama2.py b/experimental/regression_suite/tests/pregenerated/test_llama2.py
index a685829..0ff2ca9 100644
--- a/experimental/regression_suite/tests/pregenerated/test_llama2.py
+++ b/experimental/regression_suite/tests/pregenerated/test_llama2.py

@@ -36,7 +36,7 @@
         "rdna3_vulkan",
         flags=COMMON_FLAGS
         + [
-            "--iree-hal-target-backends=vulkan",
+            "--iree-hal-target-backends=vulkan-spirv",
             "--iree-vulkan-target-triple=rdna3-unknown-linux",
         ],
     )
@@ -75,7 +75,7 @@
         "a100_vulkan",
         flags=COMMON_FLAGS
         + [
-            "--iree-hal-target-backends=vulkan",
+            "--iree-hal-target-backends=vulkan-spirv",
             f"--iree-vulkan-target-triple=ampere-a100-linux",
         ],
     )

diff --git a/samples/custom_dispatch/vulkan/shaders/example.mlir b/samples/custom_dispatch/vulkan/shaders/example.mlir
index d9a6b84..d655b98 100644
--- a/samples/custom_dispatch/vulkan/shaders/example.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example.mlir

@@ -13,7 +13,7 @@
 // executable binaries produced and multiple variants with differing formats
 // and compilation options (architectures, etc) can be embedded for runtime
 // selection.
-#spirv_target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#spirv_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<
     #spirv.vce<v1.3, [Shader, GroupNonUniform], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>,
     #spirv.resource_limits<max_compute_workgroup_size = [128, 128, 64], subgroup_size = 64>

diff --git a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir
index 9e62b8d..5cdbcac 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir

@@ -13,7 +13,7 @@
 // executable binaries produced and multiple variants with differing formats
 // and compilation options (architectures, etc) can be embedded for runtime
 // selection.
-#spirv_target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#spirv_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<
     #spirv.vce<v1.3, [Shader, GroupNonUniform], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>,
     #spirv.resource_limits<max_compute_workgroup_size = [128, 128, 64], subgroup_size = 64>

diff --git a/samples/custom_dispatch/vulkan/shaders/example_transform.mlir b/samples/custom_dispatch/vulkan/shaders/example_transform.mlir
index c562a89..3766a30 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_transform.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_transform.mlir

@@ -17,7 +17,7 @@
 // HACK: Currently this must match EXACTLY with the executable target for the
 // custom kernel. For things to be truly portable, we need to be able to compare
 // executable configurations.
-#spirv_target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#spirv_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<
     #spirv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformArithmetic, GroupNonUniformBallot],
                      [SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>,

diff --git a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
index 08b977d..70ad898 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir

@@ -6,7 +6,7 @@
 
 // The configuration used for executable compilation.
 // This specifies the device configurations that support this custom kernel.
-#spirv_target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+#spirv_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<
     #spirv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformArithmetic, GroupNonUniformBallot],
                      [SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>,

diff --git a/samples/transform_dialect/example_module.mlir b/samples/transform_dialect/example_module.mlir
index 22f2927..9ea0f78 100644
--- a/samples/transform_dialect/example_module.mlir
+++ b/samples/transform_dialect/example_module.mlir

@@ -30,14 +30,14 @@
 module attributes {
   hal.device.targets = [
     #hal.device.target<"vulkan", [
-      #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+      #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
         spirv.target_env = #target_env
       }>
     ]>
   ]
 } {
   hal.executable private @example_module_dispatch_0 {
-    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
+    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
       hal.executable.export public @example_module_dispatch_0_generic_80_f32 ordinal(0) layout(
                                    #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device):
@@ -63,7 +63,7 @@
     }
   }
   hal.executable private @example_module_dispatch_1 {
-    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
+    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
       hal.executable.export public @example_module_dispatch_1_matmul_16x16x5_f32 ordinal(0) layout(
                                    #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device):
@@ -87,7 +87,7 @@
     }
   }
   hal.executable private @example_module_dispatch_2 {
-    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
+    hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #target_env}>) {
       hal.executable.export public @example_module_dispatch_2_generic_16x16_f32 ordinal(0) layout(
                                    #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device):

diff --git a/tests/e2e/regression/libm_linking.mlir b/tests/e2e/regression/libm_linking.mlir
index 543ac86..9520ee4 100644
--- a/tests/e2e/regression/libm_linking.mlir
+++ b/tests/e2e/regression/libm_linking.mlir

@@ -1,5 +1,5 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=llvm-cpu},iree-transformation-pipeline)' %s | FileCheck %s
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targets=llvm-cpu},iree-transformation-pipeline)' --iree-llvmcpu-link-embedded=false %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=llvm-cpu},iree-transformation-pipeline)' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetBackends=llvm-cpu},iree-transformation-pipeline)' --iree-llvmcpu-link-embedded=false %s | FileCheck %s
 
 // When lowering to CPU code through LLVM, certain LLVM intrinsics require
 // linking against libm (the standard C library of math functions, `-lm`).
commit	890b070bd94a62dc90203ed0514b212836af46ac	[log] [tgz]
author	Ben Vanik <ben.vanik@gmail.com>	Thu Feb 29 10:28:37 2024 -0800
committer	GitHub <noreply@github.com>	Thu Feb 29 10:28:37 2024 -0800
tree	88f433fdc59301c7b6f77555c1e9b1f073ed206d
parent	b2250d8ff4c81ab51095807944fe5c6809618ae0 [diff]