Cleaning up hal.executable.variant syntax. (#15254) This will make it possible to add more fields/regions without creating parsing ambiguity.

commit: 20e2112cf149bb342e1d9bfe8f01229a674de1ab [log] [tgz]
author: Ben Vanik <ben.vanik@gmail.com> Fri Oct 20 11:44:37 2023 -0700
committer: GitHub <noreply@github.com> Fri Oct 20 11:44:37 2023 -0700
tree: 589d1e7eb03c0c819068007a7b3e0f6621e2ccdc
parent: a95a28a7f56472b3098edfd31d0adeedc64cb180 [diff]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
index d3b37d1..9c474e1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-distribute, cse)))))" %s | FileCheck %s
 
 hal.executable private @add_tensor  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @add_tensor ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir
index cfc17da..3485646 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir

@@ -16,7 +16,7 @@
   ]>
 ]>
 hal.executable private @shared_mem_cpy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_mem_cpy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 4: index, 1:index]
     } {
@@ -110,7 +110,7 @@
 ]>
 
 hal.executable private @unaligned_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @unaligned_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {
@@ -166,7 +166,7 @@
 ]>
 
 hal.executable private @zero_dim_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {
@@ -204,7 +204,7 @@
 ]>
 
 hal.executable private @zero_dim_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
index ec91bab..ae8a862 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-tensor-tile, cse)))))" %s | FileCheck %s
 
 hal.executable private @add_tensor  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @add_tensor ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
@@ -61,7 +61,7 @@
 // -----
 
 hal.executable private @reduction  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @reduction ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
@@ -121,7 +121,7 @@
 // -----
 
 hal.executable private @reduction_broadcast  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @reduction_broadcast ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
index 0c2eb0e..263d3a2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-fold-affinemin-in-distributed-loops, canonicalize)))))' --split-input-file %s | FileCheck %s
 
 hal.executable public @generic_static {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
     hal.executable.export public @generic_static ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTransposeSharedMem>, workgroup_size = [8 : index, 32 : index, 1 : index]} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %c128 = arith.constant 128 : index

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
index 9621c77..f3b11f9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir

@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: func.func @dispatch_0()
 hal.executable private @dispatch_0  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dispatch_0 layout(#pipeline_layout) attributes {
       workgroup_size = [64: index, 1: index, 1:index]
     } {
@@ -60,7 +60,7 @@
 // CHECK-LABEL: func.func @workgroup_tile_loop()
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @workgroup_tile_loop  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @workgroup_tile_loop layout(#pipeline_layout) attributes {
       translation_info = #translation
     } {
@@ -99,7 +99,7 @@
 // CHECK-LABEL: func.func @workgroup_tile_loop_negative()
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @workgroup_tile_loop_negative  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @workgroup_tile_loop_negative layout(#pipeline_layout) attributes {
       translation_info = #translation
     } {
@@ -140,7 +140,7 @@
 //       CHECK:   gpu.barrier
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @both_workgroup_and_workitem  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @both_workgroup_and_workitem layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [8: index, 2: index, 1: index]
@@ -196,11 +196,9 @@
 
 // -----
 
-
 #config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>
 #device_target_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>]}>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 #map0 = affine_map<()[s0] -> (s0 ceildiv 4)>
 #map1 = affine_map<()[s0] -> (s0 * 4)>
@@ -208,7 +206,7 @@
 #map3 = affine_map<(d0)[s0] -> (d0 + s0)>
 module attributes {hal.device.targets = [#device_target_cpu]} {
   hal.executable private @simple_mul {
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
       hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index ea1a691..dbe3c84 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir

@@ -17,7 +17,7 @@
 }>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @matmul_tensors layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3
@@ -116,7 +116,7 @@
 #map1 = affine_map<(d0, d1) -> (d1)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @add {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @add layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2
@@ -190,7 +190,7 @@
 #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @add4D {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @add4D layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 :index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -270,7 +270,7 @@
 #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @add_distribute4D {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @add_distribute4D layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 :index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -391,7 +391,7 @@
 #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @add_distribute4D_zero_tile_size {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @add_distribute4D_zero_tile_size layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 :index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -465,7 +465,7 @@
   target_triple = "aarch64-none-elf"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @batch_matmul_tensors {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @batch_matmul_tensors layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -536,7 +536,7 @@
 #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @preset_config_matmul_tensors {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @preset_config layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -600,7 +600,7 @@
 #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
 #translation = #iree_codegen.translation_info<CPUBufferOpsTileAndVectorize>
 hal.executable public @copy_op {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @copy_op layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3: index, %arg4 : index, %arg5: index, %arg6 : index, %arg7: index, %arg8 : index, %arg9: index, %arg10: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10
@@ -701,7 +701,7 @@
 #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @static_1d_fft_stage2 {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @static_1d_fft_stage2 layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -756,7 +756,7 @@
 #executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @static_3d_fft_stage3 {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @static_3d_fft_stage3 layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -813,7 +813,7 @@
 #map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @outs_fusion {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @outs_fusion_fn layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3
@@ -897,7 +897,7 @@
   target_triple = "x86_64-unknown-linux-gnu"}>
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @conv {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @conv layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index, %arg7 : index, %arg8 : index, %arg9 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9
@@ -987,7 +987,7 @@
   target_triple = "x86_64-unknown-linux-gnu"}>
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @conv_static {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @conv_static layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6
@@ -1062,7 +1062,7 @@
 #map1 = affine_map<(d0, d1) -> (d0, d1)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @generic_static {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @generic_static layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1133,7 +1133,7 @@
   target_triple = "aarch64-none-linux-android30"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @matmul_static {
-  hal.executable.variant public @system_elf_arm_64, target = #executable_target_system_elf_arm_64_ {
+  hal.executable.variant public @system_elf_arm_64 target(#executable_target_system_elf_arm_64_) {
     hal.executable.export public @matmul_static layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1190,7 +1190,7 @@
   target_triple = "aarch64-none-linux-android30"}>
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @restrict_num_workgroups {
-  hal.executable.variant public @system_elf_arm_64, target = #executable_target_system_elf_arm_64_ {
+  hal.executable.variant public @system_elf_arm_64 target(#executable_target_system_elf_arm_64_) {
     hal.executable.export public @restrict_num_workgroups layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1250,7 +1250,7 @@
 #map2 = affine_map<(d0) -> (d0)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @reduction {
-  hal.executable.variant public @reduction, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @reduction target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @reduction ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1320,7 +1320,7 @@
   target_triple = "x86_64-none-elf"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @gemm_unit_N {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @gemm_unit_N ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2
@@ -1392,7 +1392,7 @@
   target_triple = "x86_64-none-elf"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @gemm_unit_M_unit_N {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @gemm_unit_M_unit_N ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1452,7 +1452,7 @@
 #map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @generic_unit_dims {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @generic_unit_dims layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -1528,7 +1528,7 @@
 #map1 = affine_map<(d0) -> ()>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @reduce_to_scalar {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @reduce_to_scalar layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1
@@ -1588,7 +1588,7 @@
 #map = affine_map<() -> ()>
 #translation = #iree_codegen.translation_info<CPUDefault>
 hal.executable private @scalar {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @scalar layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -1645,7 +1645,7 @@
 }>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @rank_reduced_slice {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @rank_reduced_slice layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -1713,7 +1713,7 @@
   target_triple = "x86_64-unknown-linux-gnu"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @matmul_interchange {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @matmul_interchange layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3
@@ -1772,7 +1772,7 @@
 // -----
 
 hal.executable private @no_compute {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @no_compute ordinal(0) layout(#hal.pipeline.layout<push_constants = 5, sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4 : index, %arg5 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4, %arg5
@@ -1813,7 +1813,7 @@
 // -----
 
 hal.executable private @tile_multiuse_producer {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf_x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf_x86_64", {}>) {
     hal.executable.export public @tile_multiuse_producer ordinal(0) layout (#hal.pipeline.layout<
       push_constants = 0, sets = [<0, bindings = [
           <0, storage_buffer, ReadOnly>, <1, storage_buffer>, <2, storage_buffer>, <3, storage_buffer>]>]>)
@@ -1910,7 +1910,7 @@
 // -----
 
 hal.executable private @no_tile {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @no_tile ordinal(0) layout(#hal.pipeline.layout<
         push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>, <3, storage_buffer>]>]>)
         attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
@@ -1949,7 +1949,7 @@
 // -----
 
 hal.executable private @pack_lowering {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @gemm_lhs_pack ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0,
             sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -1989,7 +1989,7 @@
 // -----
 
 hal.executable private @pack_lowering {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @gemm_rhs_transpose_pack ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0,
             sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -2029,7 +2029,7 @@
 // -----
 
 hal.executable private @clone_index_computations {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @clone_index_computations ordinal(0) layout(
         #hal.pipeline.layout<push_constants = 4, sets = [
             <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -2092,7 +2092,7 @@
 // -----
 
 hal.executable private @dynamic_unpack {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @dynamic_unpack ordinal(0) layout(
         #hal.pipeline.layout<push_constants = 4, sets = [
             <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -2138,7 +2138,7 @@
 // -----
 
 hal.executable private @dynamic_unpack_dynamic_tile {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export public @dynamic_unpack_dynamic_tile ordinal(0) layout(
         #hal.pipeline.layout<push_constants = 4, sets = [
             <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -2186,7 +2186,7 @@
 // -----
 
 hal.executable private @unpack_elem {
-  hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {}> {
+  hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {}>) {
     hal.executable.export public @unpack_elem ordinal(0) layout(
         #hal.pipeline.layout<push_constants = 0, sets = [
             <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
@@ -2224,7 +2224,7 @@
 // -----
 
 hal.executable private @dynamic_unpack_fusion {
-  hal.executable.variant public @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb", {ukernels = true}> {
+  hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb", {ukernels = true}>) {
     hal.executable.export public @dynamic_unpack_fusion ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<VMVXDefault>} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -2281,7 +2281,7 @@
 // -----
 
 hal.executable private @elem_pack {
-  hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>) {
     hal.executable.export public @elem_pack ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer>, <4, storage_buffer>, <5, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -2345,7 +2345,7 @@
 // -----
 
 hal.executable private @scatter {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @scatter ordinal(0)
     layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
     attributes {translation_info = #iree_codegen.translation_info<LLVMGPUDistribute>, workgroup_size = [1 : index, 1 : index, 1 : index]} {
@@ -2384,7 +2384,7 @@
 
 module attributes {hal.device.targets = [#hal.device.target<"cuda", {executable_targets = [#hal.executable.target<"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}>], legacy_sync}>]} {
   hal.executable private @collapse_workgroups_dispatch_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}> {
+    hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}>) {
       hal.executable.export public @collapse_workgroups_dispatch_dispatch_0_generic_1024x128x16x64 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device):
         %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -2410,7 +2410,7 @@
 }
 
 // CHECKW-LABEL:   hal.executable private @collapse_workgroups_dispatch_dispatch_0 {
-//       CHECKW:           hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+//       CHECKW:           hal.executable.variant public @cuda_nvptx_fb
 //       CHECKW:             hal.executable.export public @collapse_workgroups_dispatch_dispatch_0_generic_1024x128x16x64 ordinal(0) layout(#pipeline_layout) {
 //       CHECKW:             ^bb0(%[[ARG0:.*]]: !hal.device):
 //   CHECKW-DAG:               %[[C2097152:.*]] = arith.constant 2097152 : index
@@ -2436,7 +2436,7 @@
 }>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @llvm, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @llvm target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @matmul_tensor_count_from_dag_root layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/warp_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/test/warp_reduction.mlir
index 9ff81bb..2f7c0af 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/warp_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/warp_reduction.mlir

@@ -8,7 +8,7 @@
   ]>
 ]>
 hal.executable private @simple_reduce  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @simple_reduce layout(#pipeline_layout) attributes {
       workgroup_size = [32 : index, 1 : index, 1 : index]
     }
@@ -89,7 +89,7 @@
   ]>
 ]>
 hal.executable private @reduce_uniform_buffer_offset  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @reduce_uniform_buffer_offset layout(#pipeline_layout) attributes {
       workgroup_size = [32 : index, 1 : index, 1 : index]
     }
@@ -158,7 +158,7 @@
   ]>
 ]>
 hal.executable private @reduce_storage_buffer_offset  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @reduce_storage_buffer_offset layout(#pipeline_layout) attributes {
       workgroup_size = [32 : index, 1 : index, 1 : index]
     }

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir
index 5516df1..93d68db 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir

@@ -12,7 +12,7 @@
 ]>
 
 hal.executable private @foo {
-hal.executable.variant @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {cpu_features = "+dotprod", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android29"}> {
+hal.executable.variant @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {cpu_features = "+dotprod", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android29"}>) {
 hal.executable.export @foo layout(#pipeline_layout)
 builtin.module attributes {llvm.data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", llvm.target_triple = "aarch64-none-linux-android29"} {
 

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir
index a0dccff..8077651 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir

@@ -15,7 +15,7 @@
 #map = affine_map<()[s0] -> (s0 ceildiv 2)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @apply_scale_no_vector_feature {
-  hal.executable.variant public @embedded_elf_riscv_64, target = #executable_target_embedded_elf_riscv_64_ {
+  hal.executable.variant public @embedded_elf_riscv_64 target(#executable_target_embedded_elf_riscv_64_) {
     hal.executable.export public @apply_scale_no_vector_feature ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %c1 = arith.constant 1 : index
@@ -63,7 +63,7 @@
 #map = affine_map<()[s0] -> (s0 ceildiv 2)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @apply_scale_v {
-  hal.executable.variant public @embedded_elf_riscv_64, target = #executable_target_embedded_elf_riscv_64_ {
+  hal.executable.variant public @embedded_elf_riscv_64 target(#executable_target_embedded_elf_riscv_64_) {
     hal.executable.export public @apply_scale_v ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %c1 = arith.constant 1 : index
@@ -109,7 +109,7 @@
 #map = affine_map<()[s0] -> (s0 ceildiv 2)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @apply_scale_zve64x {
-  hal.executable.variant public @embedded_elf_riscv_64, target = #executable_target_embedded_elf_riscv_64_ {
+  hal.executable.variant public @embedded_elf_riscv_64 target(#executable_target_embedded_elf_riscv_64_) {
     hal.executable.export public @apply_scale_zve64x ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %c1 = arith.constant 1 : index
@@ -155,7 +155,7 @@
 #map = affine_map<()[s0] -> (s0 ceildiv 2)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @apply_scale_zve32x {
-  hal.executable.variant public @embedded_elf_riscv_64, target = #executable_target_embedded_elf_riscv_64_ {
+  hal.executable.variant public @embedded_elf_riscv_64 target(#executable_target_embedded_elf_riscv_64_) {
     hal.executable.export public @apply_scale_zve32x ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %c1 = arith.constant 1 : index
@@ -208,7 +208,7 @@
 #map = affine_map<()[s0] -> (s0 ceildiv 2)>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 hal.executable private @apply_scale_zve32f {
-  hal.executable.variant public @embedded_elf_riscv_64, target = #executable_target_embedded_elf_riscv_64_ {
+  hal.executable.variant public @embedded_elf_riscv_64 target(#executable_target_embedded_elf_riscv_64_) {
     hal.executable.export public @apply_scale_zve32f ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_constant_ordinals.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_constant_ordinals.mlir
index 1f0bd6d..ee6dd14 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_constant_ordinals.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_constant_ordinals.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-assign-constant-ordinals)))" --split-input-file %s | FileCheck %s
 
 hal.executable private @executable {
-  hal.executable.variant public @variant, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> {
+  hal.executable.variant public @variant target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">) {
     hal.executable.constant.block(%device: !hal.device) -> i32 as "foo" {
       %c0 = arith.constant 0 : i32
       hal.return %c0 : i32

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_import_ordinals.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_import_ordinals.mlir
index cf87fc9..ab37308 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_import_ordinals.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/assign_import_ordinals.mlir

@@ -6,7 +6,7 @@
 hal.executable private @executable {
   // CHECK: hal.executable.variant
   // CHECK-SAME: hal.executable.imports = {{.+}}["bar", true], ["foo", false]{{.+}}
-  hal.executable.variant public @variant, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> {
+  hal.executable.variant public @variant target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">) {
     builtin.module {
       // CHECK: llvm.mlir.global internal constant @__import_ordinal_foo_a(1 : i32)
       llvm.mlir.global internal @__import_ordinal_foo_a() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
index d89b0a2..e7675ec 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' --split-input-file %s | FileCheck %s
 
 hal.executable private @aligned_generic_pack {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @aligned_generic_pack ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
       %c1 = arith.constant 1 : index
@@ -46,7 +46,7 @@
 // -----
 
 hal.executable private @aligned_unpack_generic {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @aligned_unpack_generic ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %c1 = arith.constant 1 : index
@@ -97,12 +97,12 @@
   ]>
 ]>
 hal.executable private @unaligned_pack  {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
     cpu_features = "+avx512f",
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 64 : index,
     target_triple = "x86_64-none-elf"
-  }> {
+  }>) {
   hal.executable.export public @unaligned_pack layout(#pipeline_layout) {
   ^bb0(%arg0: !hal.device):
     %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir
index 14555d7..70237d8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -39,7 +39,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -68,7 +68,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -97,7 +97,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -126,7 +126,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -157,7 +157,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -186,7 +186,7 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {
@@ -216,7 +216,7 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_2d_nhwc_hwc {
-  hal.executable.variant @llvm, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}> {
+  hal.executable.variant @llvm target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {}>) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {translation_info = #translation}
     builtin.module {
       func.func @illegal() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir
index bbae20b..f3fa654 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_aarch64_launch_configuration.mlir

@@ -9,11 +9,11 @@
   ]>
 ]>
 hal.executable private @matmul_tensors  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-arm_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @matmul_tensors layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensors() {
@@ -62,12 +62,12 @@
   ]>
 ]>
 hal.executable private @matmul_tensors_sve  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-arm_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     cpu_features = "+sve",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @matmul_tensors layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensors() {
@@ -115,11 +115,11 @@
   ]>
 ]>
 hal.executable private @batch_matmul_tensors {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-arm_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @batch_matmul_tensors layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_tensors() {
@@ -166,11 +166,11 @@
   ]>
 ]>
 hal.executable private @matmul_static {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
     hal.executable.export public @matmul_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_static() {
@@ -214,11 +214,11 @@
   ]>
 ]>
 hal.executable private @conv_static {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
     hal.executable.export public @conv_static layout(#pipeline_layout)
     builtin.module {
       func.func @conv_static() {
@@ -255,11 +255,11 @@
   ]>
 ]>
 hal.executable private @restrict_num_workgroups {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
     hal.executable.export public @restrict_num_workgroups layout(#pipeline_layout)
     builtin.module {
       func.func @restrict_num_workgroups() {
@@ -304,11 +304,11 @@
   ]>
 ]>
 hal.executable private @matmul_aarch_i8_i8_i32_static  {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
   hal.executable.export public @matmul_aarch_i8_i8_i32_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_aarch_i8_i8_i32_static() {
@@ -346,11 +346,11 @@
   ]>
 ]>
 hal.executable private @matmul_aarch_i8_i8_i32_dynamic  {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
   hal.executable.export public @matmul_aarch_i8_i8_i32_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_aarch_i8_i8_i32_dynamic() {
@@ -395,11 +395,11 @@
   ]>
 ]>
 hal.executable private @pack  {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
   hal.executable.export public @pack layout(#pipeline_layout)
     builtin.module {
       func.func @pack() {
@@ -432,11 +432,11 @@
   ]>
 ]>
 hal.executable private @unpack_outer_dynamic  {
-  hal.executable.variant public @system_elf_arm_64, target = <"llvm-cpu", "system-elf-arm_64", {
+  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {
     data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-linux-android30"
-  }> {
+  }>) {
   hal.executable.export public @unpack_outer_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @unpack_outer_dynamic() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_configuration_without_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_configuration_without_distribution.mlir
index 9134056..c87a942 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_configuration_without_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_configuration_without_distribution.mlir

@@ -8,13 +8,13 @@
   ]>
 ]>
 hal.executable private @matmul_static  {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu",
     "embedded-elf-x86_64", {
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
       native_vector_size = 16 : index,
       target_triple = "x86_64-none-elf"
-    }> {
+    }>) {
     hal.executable.export public @matmul_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_static() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir
index 5378fb4..aae2c8c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_riscv_launch_configuration.mlir

@@ -8,14 +8,14 @@
   ]>
 ]>
 hal.executable private @matmul_riscv  {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu",
     "embedded-elf-riscv_32", {
       cpu_features = "+m,+f",
       data_layout = "e-m:e-p:32:32-i64:64-n32-S128",
       native_vector_size = 16 : index,
       target_triple = "riscv32-none-elf"
-    }> {
+    }>) {
     hal.executable.export public @matmul_riscv layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_riscv() {
@@ -56,14 +56,14 @@
   ]>
 ]>
 hal.executable private @thin_depthwise_conv_static {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu",
     "embedded-elf-riscv_32", {
       cpu_features = "+m,+f",
       data_layout = "e-m:e-p:32:32-i64:64-n32-S128",
       native_vector_size = 16 : index,
       target_triple = "riscv32-none-elf"
-    }> {
+    }>) {
     hal.executable.export public @thin_depthwise_conv_static layout(#pipeline_layout)
     builtin.module {
       func.func @thin_depthwise_conv_static() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_vmvx_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_vmvx_launch_configuration.mlir
index dd2696a..4517b55 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_vmvx_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_vmvx_launch_configuration.mlir

@@ -8,7 +8,7 @@
   ]>
 ]>
 hal.executable private @matmul_static  {
-  hal.executable.variant @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export public @matmul_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_static() {
@@ -48,7 +48,7 @@
   ]>
 ]>
 hal.executable @copy_op_dynamic {
-  hal.executable.variant @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @copy_op_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @copy_op_dynamic() {
@@ -89,7 +89,7 @@
   ]>
 ]>
 hal.executable private @static_1d_fft_stage2  {
-  hal.executable.variant @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @static_1d_fft_stage2 layout(#pipeline_layout)
     builtin.module {
       func.func @static_1d_fft_stage2() {
@@ -130,7 +130,7 @@
   ]>
 ]>
 hal.executable @fusion_quant_matmul_generic {
-  hal.executable.variant @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @fusion_quant_matmul_generic layout(#pipeline_layout)
     builtin.module {
       func.func @fusion_quant_matmul_generic() {
@@ -194,7 +194,7 @@
   ]>
 ]>
 hal.executable private @unpack_outer_dynamic  {
-  hal.executable.variant @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
   hal.executable.export public @unpack_outer_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @unpack_outer_dynamic() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
index c1eea9b..deac2a0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir

@@ -8,11 +8,11 @@
   ]>
 ]>
 hal.executable private @matvec_static  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @matvec_static layout(#pipeline_layout)
     builtin.module {
       func.func @matvec_static() {
@@ -50,11 +50,11 @@
   ]>
 ]>
 hal.executable private @matvec_dynamic  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @matvec_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @matvec_dynamic() {
@@ -99,11 +99,11 @@
   ]>
 ]>
 hal.executable private @dot_static  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @dot_static layout(#pipeline_layout)
     builtin.module {
       func.func @dot_static() {
@@ -140,11 +140,11 @@
   ]>
 ]>
 hal.executable private @dot_dynamic  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @dot_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @dot_dynamic() {
@@ -185,11 +185,11 @@
   ]>
 ]>
 hal.executable private @dynamic_add {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @dynamic_add layout(#pipeline_layout)
     builtin.module {
       func.func @dynamic_add() {
@@ -237,11 +237,11 @@
   ]>
 ]>
 hal.executable private @add4D  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @add4D layout(#pipeline_layout)
     builtin.module {
       func.func @add4D() {
@@ -293,11 +293,11 @@
   ]>
 ]>
 hal.executable private @add_static {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @add_static layout(#pipeline_layout)
     builtin.module {
       func.func @add_static() {
@@ -337,11 +337,11 @@
   ]>
 ]>
 hal.executable private @preset_config_matmul_tensors  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @preset_config layout(#pipeline_layout)
     builtin.module {
       func.func @preset_config() {
@@ -387,11 +387,11 @@
   ]>
 ]>
 hal.executable private @matmul_partially_pad  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @matmul_partially_pad layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_partially_pad() {
@@ -435,11 +435,11 @@
   ]>
 ]>
 hal.executable @copy_op_dynamic {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @copy_op_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @copy_op_dynamic() {
@@ -481,11 +481,11 @@
   ]>
 ]>
 hal.executable private @static_1d_fft_stage2  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @static_1d_fft_stage2 layout(#pipeline_layout)
     builtin.module {
       func.func @static_1d_fft_stage2() {
@@ -523,11 +523,11 @@
   ]>
 ]>
 hal.executable private @static_3d_fft_stage3  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @static_3d_fft_stage3 layout(#pipeline_layout)
     builtin.module {
       func.func @static_3d_fft_stage3() {
@@ -565,11 +565,11 @@
   ]>
 ]>
 hal.executable private @outs_fusion {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @outs_fusion_fn layout(#pipeline_layout)
     builtin.module {
       func.func @outs_fusion_fn() {
@@ -635,11 +635,11 @@
   ]>
 ]>
 hal.executable private @conv_dynamic {
-  hal.executable.variant public @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant public @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export public @conv_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @conv_dynamic() {
@@ -692,11 +692,11 @@
   ]>
 ]>
 hal.executable private @conv_static {
-  hal.executable.variant public @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant public @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export public @conv_static layout(#pipeline_layout)
     builtin.module {
       func.func @conv_static() {
@@ -735,7 +735,7 @@
   ]>
 ]>
 hal.executable private @conv_nchw_static {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @conv_nchw_static ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @conv_nchw_static() {
@@ -772,11 +772,11 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_static {
-  hal.executable.variant public @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant public @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 64 : index
-  }> {
+  }>) {
     hal.executable.export public @depthwise_conv_static layout(#pipeline_layout)
     builtin.module {
       func.func @depthwise_conv_static() {
@@ -819,11 +819,11 @@
   ]>
 ]>
 hal.executable private @thin_depthwise_conv_static {
-  hal.executable.variant public @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant public @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 64 : index
-  }> {
+  }>) {
     hal.executable.export public @thin_depthwise_conv_static layout(#pipeline_layout)
     builtin.module {
       func.func @thin_depthwise_conv_static() {
@@ -861,7 +861,7 @@
 // -----
 
 hal.executable private @pooling_nchw_max {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vnni,+adx,+clflushopt,+clwb,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+pku,+prfchw,+rdrnd,+rdseed,+sahf,+x87,+xsave,+xsavec,+xsaveopt,+xsaves", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf", ukernels = false}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vnni,+adx,+clflushopt,+clwb,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+pku,+prfchw,+rdrnd,+rdseed,+sahf,+x87,+xsave,+xsavec,+xsaveopt,+xsaves", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf", ukernels = false}>) {
     hal.executable.export public @pooling_nchw_max ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -901,11 +901,11 @@
   ]>
 ]>
 hal.executable private @generic_static {
-  hal.executable.variant public @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant public @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-pc-linux-gnu",
     native_vector_size = 64 : index
-  }> {
+  }>) {
     hal.executable.export public @generic_static layout(#pipeline_layout)
     builtin.module {
       func.func @generic_static() {
@@ -947,13 +947,13 @@
   ]>
 ]>
 hal.executable private @matmul_static  {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu",
     "embedded-elf-x86_64", {
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
       target_triple = "x86_64-none-elf",
       native_vector_size = 16 : index
-    }> {
+    }>) {
     hal.executable.export public @matmul_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_static() {
@@ -1001,7 +1001,7 @@
   }
 >
 hal.executable private @reduction {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @predict_dispatch_86 ordinal(0) layout(#pipeline_layout)
     builtin.module  {
       func.func @predict_dispatch_86(%arg0: !flow.dispatch.tensor<readonly:tensor<7x7x2048xf32>>,
@@ -1052,13 +1052,13 @@
   ]>
 ]>
 hal.executable private @matmul_i8_i8_i32_static  {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu",
     "embedded-elf-x86_64", {
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
       target_triple = "x86_64-none-elf",
       native_vector_size = 16 : index
-    }> {
+    }>) {
     hal.executable.export public @matmul_i8_i8_i32_static layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_i8_i8_i32_static() {
@@ -1105,7 +1105,7 @@
 #map0 = affine_map<()[s0, s1] -> (s0 * s1)>
 #map1 = affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>
 hal.executable private @gemm_unit_N {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @gemm_unit_N ordinal(0) layout(#pipeline_layout)
     builtin.module  {
       func.func @gemm_unit_N() {
@@ -1156,7 +1156,7 @@
   }
 >
 hal.executable private @gemm_unit_M_unit_N {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @gemm_unit_M_unit_N ordinal(0) layout(#pipeline_layout)
     builtin.module  {
       func.func @gemm_unit_M_unit_N() {
@@ -1203,7 +1203,7 @@
   }
 >
 hal.executable private @matmul_odd {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @matmul_odd ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_odd() {
@@ -1242,11 +1242,11 @@
   ]>
 ]>
 hal.executable private @generic_unit_dims_dynamic {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @generic_unit_dims_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @generic_unit_dims_dynamic() {
@@ -1297,11 +1297,11 @@
   ]>
 ]>
 hal.executable private @reduce_to_scalar_static {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @reduce_to_scalar_static layout(#pipeline_layout)
     builtin.module {
       func.func @reduce_to_scalar_static() {
@@ -1340,11 +1340,11 @@
   ]>
 ]>
 hal.executable private @reduce_to_scalar_dynamic {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @reduce_to_scalar_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @reduce_to_scalar_dynamic() {
@@ -1385,11 +1385,11 @@
   ]>
 ]>
 hal.executable private @scalar {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     target_triple = "x86_64-unknown-linux-gnu",
     native_vector_size = 16 : index
-  }> {
+  }>) {
     hal.executable.export @scalar layout(#pipeline_layout)
     builtin.module {
       func.func @scalar() {
@@ -1436,7 +1436,7 @@
 
 
 hal.executable private @transpose_8x8 {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_8x8 layout(#pipeline_layout)
     builtin.module {
       func.func @transpose_8x8() {
@@ -1466,9 +1466,9 @@
 // -----
 
 hal.executable private @multi_root {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
       cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
-      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @multi_root ordinal(0)
         layout(#hal.pipeline.layout<
             push_constants = 0,
@@ -1543,12 +1543,12 @@
   ]>
 ]>
 hal.executable private @pack  {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
     cpu_features = "+avx512f",
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 64 : index,
     target_triple = "x86_64-none-elf"
-  }> {
+  }>) {
   hal.executable.export public @pack layout(#pipeline_layout)
     builtin.module {
       func.func @pack() {
@@ -1582,12 +1582,12 @@
   ]>
 ]>
 hal.executable private @unpack_generic_pack  {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
     cpu_features = "+avx512f",
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "x86_64-none-elf"
-  }> {
+  }>) {
   hal.executable.export public @unpack_generic_pack layout(#pipeline_layout)
     builtin.module {
       func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) {
@@ -1631,7 +1631,7 @@
 // -----
 
 hal.executable private @elem_pack {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @elem_pack ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %c1 = arith.constant 1 : index
@@ -1679,7 +1679,7 @@
   ]>
 ]>
 hal.executable private @transpose_pack  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}> {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}>) {
     hal.executable.export @transpose_pack layout(#pipeline_layout)
     builtin.module {
       func.func @transpose_pack() {
@@ -1722,7 +1722,7 @@
   ]>
 ]>
 hal.executable private @reduction_broadcast_pack  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}> {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}>) {
     hal.executable.export @reduction_broadcast_pack layout(#pipeline_layout)
     builtin.module {
       func.func @reduction_broadcast_pack() {
@@ -1788,7 +1788,7 @@
 // -----
 
 hal.executable private @unpack_elem {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @unpack_elem ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -1826,7 +1826,7 @@
 // -----
 
 hal.executable private @quant_model {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @quant_model ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer, ReadOnly>, <4, storage_buffer, ReadOnly>, <5, storage_buffer, ReadOnly>, <6, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -1873,10 +1873,10 @@
 // -----
 
 hal.executable private @no_compute_ops {
-  hal.executable.variant public @embedded_elf_x86_64, target = <
+  hal.executable.variant public @embedded_elf_x86_64 target(<
       "llvm-cpu", "embedded-elf-x86_64",
       {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
-       native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = false}> {
+       native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = false}>) {
     hal.executable.export public @test ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
@@ -1914,7 +1914,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 
 hal.executable private @non_trivial_program {
-  hal.executable.variant public @system_elf_x86_64, target = #executable_target_system_elf_x86_64_ {
+  hal.executable.variant public @system_elf_x86_64 target(#executable_target_system_elf_x86_64_) {
     hal.executable.export public @non_trivial_program ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %c1 = arith.constant 1 : index
@@ -1976,7 +1976,7 @@
 #map3 = affine_map<(d0, d1, d2) -> (d0)>
 module {
   hal.executable public @i4_dequant_matvec {
-    hal.executable.variant public @llvm, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @llvm target(#executable_target_embedded_elf_x86_64_) {
       hal.executable.export public @i4_dequant_matvec_f32 layout(#pipeline_layout)
       builtin.module {
         func.func @i4_dequant_matvec_f32() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_conv_pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_conv_pipeline_tests.mlir
index 0835f7c..3a49ce9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_conv_pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_conv_pipeline_tests.mlir

@@ -6,7 +6,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module {
   hal.executable private @pad_conv_2d {
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
       hal.executable.export public @pad_conv_2d_nchw_fchw_1x320x64x64x320x3x3 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_pipeline_tests.mlir
index fe2e5dc..973771f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pad_pipeline_tests.mlir

@@ -1,10 +1,10 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))" --split-input-file %s | FileCheck %s
 
 hal.executable private @pad_only {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
       cpu = "generic", cpu_features = "",
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
-      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @pad_only_dispatch ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0,
             sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
@@ -60,10 +60,10 @@
 // -----
 
 hal.executable private @pad_with_producer {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
       cpu = "generic", cpu_features = "",
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
-      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @pad_with_producer_dispatch ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0,
             sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
@@ -158,10 +158,10 @@
 // -----
 
 hal.executable private @pad_consumer_fusion {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {
       cpu = "generic", cpu_features = "",
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
-      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+      native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @pad_consumer_fusion_dispatch ordinal(0)
         layout(#hal.pipeline.layout<push_constants = 0,
             sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel_and_vectorize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel_and_vectorize.mlir
index e863b6b..6722802 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel_and_vectorize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel_and_vectorize.mlir

@@ -13,7 +13,7 @@
   ]>
 ]>
 hal.executable private @preset_config_matmul  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64"> {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64">) {
     hal.executable.export @no_peel_static_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -68,7 +68,7 @@
   ]>
 ]>
 hal.executable private @preset_config_matmul  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64"> {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64">) {
     hal.executable.export @peel_static_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -135,7 +135,7 @@
   ]>
 ]>
 hal.executable private @preset_config_matmul  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64"> {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64">) {
     hal.executable.export @peel_dynamic_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
index 68f1078..90285a0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir

@@ -20,7 +20,7 @@
     #hal.descriptor_set.binding<1, storage_buffer>]
   >]>
 hal.executable private @check_no_cse {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @check_no_cse ordinal(0) layout(#pipeline_layout5) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -79,7 +79,7 @@
   ]>
 ]>
 hal.executable private @preset_pad_config_matmul  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64"> {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64">) {
     hal.executable.export @preset_pad_config_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -124,7 +124,7 @@
   ]>
 ]>
 hal.executable private @preset_pad_config_dynamic_matmul  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64"> {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64">) {
     hal.executable.export @preset_pad_config_dynamic_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
@@ -181,7 +181,7 @@
     #hal.descriptor_set.binding<2, storage_buffer>]
   >]>
 hal.executable private @pad_partially_unaligned_matmul {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @pad_partially_unaligned_matmul ordinal(0) layout(#pipeline_layout5) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -243,7 +243,7 @@
     #hal.descriptor_set.binding<2, storage_buffer>]
   >]>
 hal.executable private @batch_matmul_dynamic {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @batch_matmul_dynamic ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -295,7 +295,7 @@
     #hal.descriptor_set.binding<1, storage_buffer>]
   >]>
 hal.executable private @check_buffer_ops_vectorization {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @check_buffer_ops_vectorization ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -328,13 +328,13 @@
 // -----
 
 hal.executable private @vectorize_fill_conv2d_generic {
-  hal.executable.variant public @embedded_elf_x86_64, target = #hal.executable.target<
+  hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<
     "llvm-cpu", "embedded-elf-x86_64", {
       cpu_features = "",
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
       native_vector_size = 16 : index,
       target_triple = "x86_64-none-elf"
-    }> {
+    }>) {
     hal.executable.export public @vectorize_fill_conv2d_generic ordinal(0) layout(
       #hal.pipeline.layout<push_constants = 0, sets = [
         #hal.descriptor_set.layout<0, bindings = [
@@ -398,7 +398,7 @@
 // -----
 
 hal.executable private @multi_result {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @multi_result ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -446,7 +446,7 @@
 // -----
 
 hal.executable private @quant_matmul_fusion {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> {
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
     hal.executable.export public @quant_matmul_fusion ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer, ReadOnly>, <4, storage_buffer, ReadOnly>, <5, storage_buffer, ReadOnly>, <6, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -521,11 +521,11 @@
 // -----
 
 hal.executable private @mmt4d_ukernel {
-  hal.executable.variant public @embedded_elf_x86_64, target = <"llvm-cpu", "embedded-elf-x86_64",
+  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64",
       {cpu = "generic", cpu_features = "",
        data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
        native_vector_size = 16 : index, target_triple = "x86_64-none-elf",
-       ukernels = true}> {
+       ukernels = true}>) {
     hal.executable.export public @ukernel_dispatch ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6
@@ -557,12 +557,12 @@
 // -----
 
 hal.executable private @ukernel_pass_through {
-  hal.executable.variant public @embedded_elf_x86_64, target = <
+  hal.executable.variant public @embedded_elf_x86_64 target(<
     "llvm-cpu", "embedded-elf-x86_64", {
       cpu = "generic", cpu_features = "",
       data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
       native_vector_size = 16 : index, target_triple = "x86_64-none-elf",
-      ukernels = false}> {
+      ukernels = false}>) {
     hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout<
       push_constants = 2, sets = [
         <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>,
@@ -640,7 +640,7 @@
 }>
 
 hal.executable private @aarch64_ssve__cpu_buffer_ops_tile_and_vectorize {
-  hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
       lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>,
       translation_info = #iree_codegen.translation_info<CPUBufferOpsTileAndVectorize>
@@ -668,7 +668,7 @@
 // CHECK: func.func @dispatch() attributes {arm_locally_streaming}
 
 hal.executable private @aarch64_ssve__cpu_double_tiling_peeling_expert {
-  hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
       lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>,
       translation_info = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
@@ -696,7 +696,7 @@
 // CHECK: func.func @dispatch() attributes {arm_locally_streaming}
 
 hal.executable private @aarch64_ssve__cpu_conv_tile_and_decompose_expert {
-  hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
       lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>,
       translation_info = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
@@ -734,7 +734,7 @@
 }>
 
 hal.executable private @aarch64_ssve_sve_disabled {
-  hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64_no_sve {
+  hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64_no_sve) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
       lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>,
       translation_info = #iree_codegen.translation_info<CPUBufferOpsTileAndVectorize>

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/split_reduction_pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/split_reduction_pipeline_tests.mlir
index b89d2fa..85aa23c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/split_reduction_pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/split_reduction_pipeline_tests.mlir

@@ -7,7 +7,7 @@
 #map2 = affine_map<(d0, d1) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_pass1_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_supported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -60,7 +60,7 @@
 #map2 = affine_map<(d0, d1) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_pass1_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_reduction_no_dynamic_perfect_tiling_float_supported_with_flag ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -115,7 +115,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_pass2_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_reduction_next_dynamic_supported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -163,7 +163,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_pass3_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_reduction_next_imperfect_tiling_supported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -207,7 +207,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_fail1_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_dynamic_reduction_unsupported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -244,7 +244,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_fail2_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_innermost_imperfect_reduction_unsupported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -279,7 +279,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_fail3_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_not_innermost_reduction_unsupported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -315,7 +315,7 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 hal.executable private @split_reduction_fail4_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @split_reduction_double_reduction_unsupported ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/test_config_mmt4d.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/test_config_mmt4d.mlir
index 98b7115..6d55666 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/test_config_mmt4d.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/test_config_mmt4d.mlir

@@ -12,7 +12,7 @@
 #map3 = affine_map<(d0)[s0] -> (s0, -d0 + 96)>
 #map4 = affine_map<(d0)[s0] -> (s0, -d0 + 128)>
 hal.executable private @mmt4d_384x384x512_4x1x4_dispatch_0 {
-  hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64_ {
+  hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64_) {
     hal.executable.export public @mmt4d_384x384x512_4x1x4_dispatch_0 layout(#pipeline_layout)
     builtin.module  {
       func.func @mmt4d_384x384x512_4x1x4_dispatch_0() {
@@ -54,7 +54,7 @@
   ]>
 ]>
 hal.executable private @batch_mmt4d {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @batch_mmt4d ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @batch_mmt4d() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
index 24c60c3..905690f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir

@@ -5,7 +5,7 @@
 #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
 
 hal.executable private @pad_matmul_static_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export public @pad_matmul_static_dispatch_0 ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @pad_matmul_static_dispatch_0() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_iree_tile_to_forall.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_iree_tile_to_forall.mlir
index 5239409..ac1f307 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_iree_tile_to_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_iree_tile_to_forall.mlir

@@ -12,7 +12,7 @@
 // CHECK: #[[$NUM_THREADS_MAP:.*]] = affine_map<(d0) -> (d0 * 32)>
 
 hal.executable private @matmul_static_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
 
     hal.executable.export public @matmul_static_dispatch_0_matmul_1024x4096x12345 ordinal(0) layout(#pipeline_layout) {
     // Check that num_threads is reflected in the workgroup size.
@@ -69,8 +69,8 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 
 hal.executable private @matmul_static_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
-  
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
+
     hal.executable.export public @elementwise_out_of_order_block_id ordinal(0) layout(#pipeline_layout) {
     // Check that num_threads is consistent with the specified mapping
     // CHECK-LABEL: hal.executable.export public @elementwise_out_of_order_block_id
@@ -82,8 +82,8 @@
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
       hal.return %x, %y, %z : index, index, index
-    }   
-  
+    }
+
     builtin.module {
       func.func @elementwise_out_of_order_block_id() {
         // CHECK-LABEL: func.func @elementwise_out_of_order_block_id
@@ -98,15 +98,15 @@
           indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                            affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
           iterator_types = ["parallel", "parallel", "parallel"]}
-          ins(%2 : tensor<3x5x8xf32>) outs(%empty : tensor<3x5x8xf32>) {   
+          ins(%2 : tensor<3x5x8xf32>) outs(%empty : tensor<3x5x8xf32>) {
           ^bb0(%in: f32, %in_0: f32):
-            %4 = math.sqrt %in : f32 
-            linalg.yield %4 : f32 
+            %4 = math.sqrt %in : f32
+            linalg.yield %4 : f32
           } -> tensor<3x5x8xf32>
         flow.dispatch.tensor.store %3, %1, offsets = [0, 0, 0], sizes = [3, 5, 8], strides = [1, 1, 1] : tensor<3x5x8xf32> -> !flow.dispatch.tensor<writeonly:tensor<3x5x8xf32>>
         return
-      }   
-    }   
+      }
+    }
   }
 }
 
@@ -123,8 +123,8 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 
 hal.executable private @matmul_static_dispatch_0 {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
-  
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
+
     hal.executable.export public @vecadd2d_dispatch_0_generic_9x512_f32 ordinal(0) layout(#pipeline_layout) {
     // Check that num_threads is consistent with the specified mapping
     // CHECK-LABEL: hal.executable.export public @vecadd2d_dispatch_0_generic_9x512_f32
@@ -136,8 +136,8 @@
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
       hal.return %x, %y, %z : index, index, index
-    }   
-  
+    }
+
     builtin.module {
       func.func @vecadd2d_dispatch_0_generic_9x512_f32() {
         %c18432 = arith.constant 18432 : index

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transpose_avx2_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transpose_avx2_lowering.mlir
index 0e1c27f..e93570b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transpose_avx2_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transpose_avx2_lowering.mlir

@@ -17,7 +17,7 @@
 
 
 hal.executable private @transpose_10_8x8_pattern {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_10_8x8_pattern layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -75,7 +75,7 @@
 
 
 hal.executable private @transpose_021_8x8_pattern {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_021_8x8_pattern layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -133,7 +133,7 @@
 
 
 hal.executable private @transpose_201_8x8_pattern {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_201_8x8_pattern layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -191,7 +191,7 @@
 
 
 hal.executable private @transpose_210_8x8_pattern {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_210_8x8_pattern layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -249,7 +249,7 @@
 
 
 hal.executable private @transpose_120_8x8_pattern {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_120_8x8_pattern layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -307,7 +307,7 @@
 
 
 hal.executable private @transpose_102 {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @transpose_102 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -357,7 +357,7 @@
 
 
 hal.executable private @test_no_avx2_feature {
-  hal.executable.variant @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64_) {
     hal.executable.export @test_no_avx2_feature layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_masking.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_masking.mlir
index 02eec62..7e0d4da 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_masking.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_masking.mlir

@@ -12,11 +12,11 @@
   ]>
 ]>
 hal.executable private @preset_config_generic_add  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 32 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @mask_dynamic_generic_add layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -80,11 +80,11 @@
   ]>
 ]>
 hal.executable private @preset_config_reduction  {
-  hal.executable.variant @system_elf_x86_64, target = <"llvm-cpu", "system-elf-x86_64", {
+  hal.executable.variant @system_elf_x86_64 target(<"llvm-cpu", "system-elf-x86_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 32 : index,
     target_triple = "x86_64-unknown-linux-gnu"
-  }> {
+  }>) {
     hal.executable.export @mask_dynamic_reduction layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -138,11 +138,11 @@
   ]>
 ]>
 hal.executable private @preset_config_generic_add  {
-  hal.executable.variant @embedded_elf_rv32, target = <"llvm-cpu", "embedded-elf-riscv_32", {
+  hal.executable.variant @embedded_elf_rv32 target(<"llvm-cpu", "embedded-elf-riscv_32", {
       data_layout = "e-m:e-p:32:32-i64:64-n32-S128",
       native_vector_size = 32 : index,
       target_triple = "riscv32-none-elf"
-    }> {
+    }>) {
     hal.executable.export @mask_dynamic_generic_add layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -207,11 +207,11 @@
   ]>
 ]>
 hal.executable private @preset_config_generic_add  {
-  hal.executable.variant @embedded_elf_rv32, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @embedded_elf_rv32 target(<"llvm-cpu", "embedded-elf-arm_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @mask_dynamic_generic_add layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -269,12 +269,12 @@
   ]>
 ]>
 hal.executable private @preset_config_matmul  {
-  hal.executable.variant @llvm, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @llvm target(<"llvm-cpu", "embedded-elf-arm_64", {
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     cpu_features = "+sve",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @mask_matmul_sve layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -328,12 +328,12 @@
   ]>
 ]>
 hal.executable private @preset_config_generic_add  {
-  hal.executable.variant @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {
+  hal.executable.variant @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {
     cpu_features = "+sve",
     data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     native_vector_size = 16 : index,
     target_triple = "aarch64-none-elf"
-  }> {
+  }>) {
     hal.executable.export @mask_dynamic_generic_add layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_nd_extract.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_nd_extract.mlir
index 7e7cdb0..ccda83c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_nd_extract.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_nd_extract.mlir

@@ -2,7 +2,7 @@
 
 module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "system-elf-riscv_64", {cpu = "generic-rv64", cpu_features = "+m,+a,+f,+d,+v", data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", native_vector_size = 64 : index, target_triple = "riscv64"}>]}>]} {
   hal.executable private @main_dispatch_77 {
-    hal.executable.variant public @system_elf_riscv_64, target = <"llvm-cpu", "system-elf-riscv_64", {cpu = "generic-rv64", cpu_features = "+m,+a,+f,+d,+v", data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", native_vector_size = 64 : index, target_triple = "riscv64"}> {
+    hal.executable.variant public @system_elf_riscv_64 target(<"llvm-cpu", "system-elf-riscv_64", {cpu = "generic-rv64", cpu_features = "+m,+a,+f,+d,+v", data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", native_vector_size = 64 : index, target_triple = "riscv64"}>) {
       hal.executable.export public @main_dispatch_77_generic_1x257x257x21 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir
index 4db2814..73d24ba 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir

@@ -4,7 +4,7 @@
 // RUN: FileCheck --check-prefix=CHECK %s
 
 hal.executable @_attention_dispatch_0 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @_attention_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test.mlir
index 36640c9..82995d9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test.mlir

@@ -7,7 +7,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable private @conv2d_1x230x230x3_7x7x3x64_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @conv2d_1x230x230x3_7x7x3x64 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7
@@ -51,7 +51,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable private @conv_nchw_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @conv_nchw ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
index 9fad7b3..fdc4ce6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir

@@ -11,7 +11,7 @@
   ]>
 ]>
 hal.executable @abs_ex_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @abs_ex_dispatch_0 layout(#pipeline_layout)
     builtin.module {
       func.func @abs_ex_dispatch_0() {
@@ -54,7 +54,7 @@
   ]>
 ]>
 hal.executable @abs_dynamic {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @abs_dynamic layout(#pipeline_layout)
     builtin.module {
       func.func @abs_dynamic() {
@@ -115,7 +115,7 @@
   ]>
 ]>
 hal.executable @dead_symbol {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dead_symbol layout(#pipeline_layout)
     builtin.module {
       func.func @dead_symbol() {
@@ -153,7 +153,7 @@
   ]>
 ]>
 hal.executable @mixed_type {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @mixed_type layout(#pipeline_layout)
     builtin.module {
       func.func @mixed_type() {
@@ -193,7 +193,7 @@
   ]>
 ]>
 hal.executable @shared_memory_lowering {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_memory_lowering layout(#pipeline_layout)
     builtin.module {
       func.func @shared_memory_lowering() {
@@ -233,7 +233,7 @@
   ]>
 ]>
 hal.executable @shared_memory_dealloc_elision {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_memory_dealloc_elision layout(#pipeline_layout)
     builtin.module {
 // CHECK-LABEL: llvm.func @shared_memory_dealloc_elision() {
@@ -259,7 +259,7 @@
   ]>
 ]>
 hal.executable @shared_memory_lowering_aligned_alloc {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_memory_lowering_aligned_alloc layout(#pipeline_layout)
     builtin.module {
       func.func @shared_memory_lowering_aligned_alloc() {
@@ -298,18 +298,18 @@
   ]>
 ]>
 hal.executable @check_not_readonly {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @check_not_readonly layout(#pipeline_layout)
     builtin.module {
       func.func @check_not_readonly() {
         %c0 = arith.constant 0 : index
         %c128 = arith.constant 128 : index
         %1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<16xi32>
-        %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>        
+        %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
         %b11 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) flags(ReadOnly) : memref<16xi32>
-        %b12 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c128) : memref<16xf32, strided<[1], offset: 32>>        
+        %b12 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c128) : memref<16xf32, strided<[1], offset: 32>>
         %b21 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) flags(ReadOnly) : memref<16xi32>
-        %b22 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>        
+        %b22 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
         %2 = hal.interface.binding.subspan set(1) binding(3) type(storage_buffer) : memref<16xf32>
         %3 = gpu.block_id x
         %4 = gpu.block_dim x
@@ -341,7 +341,7 @@
   ]>
 ]>
 hal.executable @complex {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @complex layout(#pipeline_layout)
     builtin.module {
       func.func @complex() {
@@ -377,7 +377,7 @@
   ]>
 ]>
 hal.executable @shared_memory_lowering_index {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_memory_lowering_index layout(#pipeline_layout)
     builtin.module {
       func.func @shared_memory_lowering_index() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
index d20550a..6273a8d 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir

@@ -11,7 +11,7 @@
   ]>
 ]>
 hal.executable @abs_ex_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @abs_ex_dispatch_0 layout(#pipeline_layout)
     builtin.module {
       func.func @abs_ex_dispatch_0() {
@@ -53,21 +53,21 @@
   ]>
 ]>
 hal.executable @abs_ex_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @abs_ex_dispatch_0 layout(#pipeline_layout)
     builtin.module {
       func.func @reduction_maximum() {
-      %c0 = arith.constant 0 : index 
-      %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : 
-            memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>> 
-      %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : memref<32x64x64xf32, 
-            strided<[4096, 64, 1], offset: ?>> 
-      %2 = vector.load %0[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32> 
-      %3 = vector.reduction <maximumf>, %2 : vector<2xf32> into f32 
-      %4 = vector.splat %3 : vector<2xf32> 
-      vector.store %4, %1[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32> 
-      return 
-      } 
+      %c0 = arith.constant 0 : index
+      %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) :
+            memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>
+      %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : memref<32x64x64xf32,
+            strided<[4096, 64, 1], offset: ?>>
+      %2 = vector.load %0[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32>
+      %3 = vector.reduction <maximumf>, %2 : vector<2xf32> into f32
+      %4 = vector.splat %3 : vector<2xf32>
+      vector.store %4, %1[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32>
+      return
+      }
     }
   }
 }

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
index 77136eb..0b7ba91 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir

@@ -16,7 +16,7 @@
 #map3 = affine_map<(d0) -> (256, -d0 + 1024)>
 #map4 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>
 hal.executable private @dot_dispatch_0  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @dot_dispatch_0 layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 1 : index, 1 : index]
@@ -99,7 +99,7 @@
   ]>
 ]>
 hal.executable private @batch_matmul_func  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @batch_matmul_func layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [8 : index, 8 : index, 1 : index]
@@ -184,7 +184,7 @@
 #map3 = affine_map<(d0) -> (32, -d0 + 1024)>
 #map4 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>
 hal.executable private @dot_dispatch_0  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @dot_dispatch_0 layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 8 : index, 1 : index]
@@ -270,7 +270,7 @@
 ]>
 // Pure reducion case, skip tiling.
 hal.executable @reduction_dispatch {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @predict_dispatch_153 layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [1: index, 1: index, 1: index]
@@ -317,7 +317,7 @@
   ]>
 ]>
 hal.executable private @conv_dispatch  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @conv_dispatch layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 1 : index, 1 : index]
@@ -388,7 +388,7 @@
   ]>
 ]>
 hal.executable private @contract_4d  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @contract_4d layout(#pipeline_layout) attributes {
       workgroup_size = [64 : index, 8 : index, 1 : index]
     }

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir
index 0aa771f..e77dd68 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir

@@ -2,7 +2,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))" %s | FileCheck %s
 
 hal.executable @warp_reduction_dispatch {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @forward_dispatch_0_generic_320x320x3x3 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
index 258beb0..f7efff8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable @add_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @add_dispatch_0 layout(#pipeline_layout)
   builtin.module {
     func.func @add_dispatch_0() {
@@ -52,7 +52,7 @@
   ]>
 ]>
 hal.executable private @dot_dispatch_1  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dot_dispatch_1 layout(#pipeline_layout)
     builtin.module {
       func.func @dot_dispatch_1() {
@@ -92,7 +92,7 @@
   ]>
 ]>
 hal.executable private @unaligned_k  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @unaligned_k layout(#pipeline_layout)
     builtin.module {
       func.func @unaligned_k() {
@@ -132,7 +132,7 @@
   ]>
 ]>
 hal.executable @reduction_dispatch {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @predict_dispatch_153 layout(#pipeline_layout)
     builtin.module {
       func.func @predict_dispatch_153() {
@@ -174,7 +174,7 @@
   ]>
 ]>
 hal.executable private @reduction_aligned2 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export public @reduction_aligned2 ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @reduction_aligned2() {
@@ -219,7 +219,7 @@
   ]>
 ]>
 hal.executable @copy_as_generic {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @copy_as_generic layout(#pipeline_layout)
     builtin.module {
       func.func @copy_as_generic() {
@@ -255,7 +255,7 @@
   ]>
 ]>
 hal.executable private @static_1d_fft_stage2 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @static_1d_fft_stage2 layout(#pipeline_layout)
     builtin.module {
       func.func @static_1d_fft_stage2() {
@@ -292,7 +292,7 @@
   ]>
 ]>
 hal.executable private @static_3d_fft_stage3 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @static_3d_fft_stage3 layout(#pipeline_layout)
     builtin.module {
       func.func @static_3d_fft_stage3() {
@@ -337,7 +337,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export public @_lowering_config_test_dispatch_1 layout(#pipeline_layout)
   builtin.module {
     func.func @_lowering_config_test_dispatch_1() {
@@ -384,7 +384,7 @@
   ]>
 ]>
 hal.executable private @sort_op {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @sort_op layout(#pipeline_layout)
     builtin.module {
       func.func @sort_op() {
@@ -429,7 +429,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @matmul_config_sm35 layout(#pipeline_layout)
   builtin.module {
     func.func @matmul_config_sm35() {
@@ -469,7 +469,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_config_sm80 layout(#pipeline_layout)
   builtin.module {
     func.func @matmul_config_sm80() {
@@ -509,7 +509,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}>) {
   hal.executable.export public @matmul_config_sm86 layout(#pipeline_layout)
   builtin.module {
     func.func @matmul_config_sm86() {
@@ -549,7 +549,7 @@
   ]>
 ]>
 hal.executable @contract_reduction {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}>) {
   hal.executable.export public @contract_reduction layout(#pipeline_layout)
   builtin.module {
     func.func @contract_reduction() {
@@ -598,7 +598,7 @@
   ]>
 ]>
 hal.executable @dynamic_pack_2x2 {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_86"}>) {
   hal.executable.export public @dynamic_pack_2x2 layout(#pipeline_layout)
   builtin.module {
     func.func @dynamic_pack_2x2() {
@@ -641,7 +641,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @large_matmul_f16 layout(#pipeline_layout)
   builtin.module {
     func.func @large_matmul_f16() {
@@ -688,7 +688,7 @@
   ]>
 ]>
 hal.executable @user_config {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @large_matmul_f32 layout(#pipeline_layout)
   builtin.module {
     func.func @large_matmul_f32() {
@@ -735,7 +735,7 @@
   ]>
 ]>
 hal.executable @inner_unit_dim {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @inner_unit_dim layout(#pipeline_layout)
   builtin.module {
     func.func @inner_unit_dim() {
@@ -784,7 +784,7 @@
   ]>
 ]>
 hal.executable @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32 layout(#pipeline_layout)
   builtin.module {
     func.func @forward_dispatch_1_conv_2d_nhwc_hwcf_256x112x112x64x7x7x3_f32() {
@@ -836,7 +836,7 @@
 
 
 hal.executable public @_main_dispatch_15 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @_main_dispatch_15_generic_512x4x42x42x64_f32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 3, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -899,7 +899,7 @@
 ]>
 
 hal.executable public @i4_dequant_matvec {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @i4_dequant_matvec ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -966,7 +966,7 @@
         } -> tensor<4096x11008xf32>
         %41 = linalg.generic {
             indexing_maps = [
-                affine_map<(d0, d1, d2) -> (d0, d2)>, 
+                affine_map<(d0, d1, d2) -> (d0, d2)>,
                 affine_map<(d0, d1, d2) -> (d1, d2)>,
                 affine_map<(d0, d1, d2) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel", "reduction"]}

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
index a5641d9..6db7807 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [32 : index, 8 : index, 8 : index]
@@ -42,7 +42,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [32 : index, 8 : index, 2 : index]
@@ -74,7 +74,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 10 : index]
@@ -106,7 +106,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [48 : index, 2 : index, 1 : index]
@@ -138,7 +138,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 2 : index]
@@ -170,7 +170,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -202,7 +202,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [128 : index, 1 : index, 1 : index]
@@ -234,7 +234,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -266,7 +266,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -299,7 +299,7 @@
   ]>
 ]>
 hal.executable private @batch_matmul_func  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -357,7 +357,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [128 : index, 1 : index, 1 : index]
@@ -390,7 +390,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [128 : index, 1 : index, 1 : index]
@@ -422,7 +422,7 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @illegal layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [128 : index, 1 : index, 1 : index]
@@ -442,4 +442,4 @@
   }
 }
 
-// -----
\ No newline at end of file
+// -----

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
index 867ac4b..7acd071 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir

@@ -13,7 +13,7 @@
 #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable private @matmul_static_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @matmul_static_dispatch_0 ordinal(0) layout(#pipeline_layout)
       builtin.module {
         func.func @matmul_static_dispatch_0() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir
index 658559e..b9e0ae1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir

@@ -76,7 +76,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 #device_target_cuda = #hal.device.target<"cuda", {executable_targets = [#executable_target_cuda_nvptx_fb], legacy_sync}>
 hal.executable private @matmul_dispatch_0 {
-  hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export public @matmul_dispatch_0_matmul_2560x2560x2560 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
index 4102ca5..d425806 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir

@@ -13,7 +13,7 @@
   ]>
 ]>
 hal.executable @mma_fused_fp16 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -92,7 +92,7 @@
   ]>
 ]>
 hal.executable @mma_fused_f32 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -157,13 +157,13 @@
 //          CHECK:   llvm.insertvalue{{.*}} : !llvm.array<2 x vector<1xf32>>
 //          CHECK:   llvm.br
 //          CHECK:   nvvm.ldmatrix{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
-//  CHECK-COUNT-4:   llvm.extractvalue{{.*}} : !llvm.struct<(i32, i32, i32, i32)> 
+//  CHECK-COUNT-4:   llvm.extractvalue{{.*}} : !llvm.struct<(i32, i32, i32, i32)>
 //  CHECK-COUNT-2:   nvvm.mma.sync {{.*}} {layoutA = #nvvm.mma_layout<row>, layoutB = #nvvm.mma_layout<col>, multiplicandAPtxType = #nvvm.mma_type<tf32>, multiplicandBPtxType = #nvvm.mma_type<tf32>, shape = #nvvm.shape<m = 16, n = 8, k = 8>} : (i32, i32, f32) -> !llvm.struct<(f32, f32, f32, f32)>
 //  CHECK-COUNT-2:   llvm.inline_asm has_side_effects asm_dialect = att "cp.async.cg.shared.global [$0], [$1], $2, $3;\0A", "r,l,n,r" {{.*}}, {{.*}}, {{.*}}, {{.*}} : (!llvm.ptr<3>, !llvm.ptr<1>, i32, i32) -> ()
 //          CHECK:   nvvm.cp.async.commit.group
 //          CHECK:   nvvm.cp.async.wait.group 2
 //          CHECK:   nvvm.ldmatrix{{.*}} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)>
-//  CHECK-COUNT-4:   llvm.extractvalue{{.*}} : !llvm.struct<(i32, i32, i32, i32)> 
+//  CHECK-COUNT-4:   llvm.extractvalue{{.*}} : !llvm.struct<(i32, i32, i32, i32)>
 //          CHECK:   llvm.load{{.*}} : !llvm.ptr<3> -> f32
 //          CHECK:   llvm.insertvalue{{.*}} : !llvm.array<2 x vector<1xf32>>
 //          CHECK:   llvm.load{{.*}} : !llvm.ptr<3> -> f32

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
index 7c700f0..95a2c61 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir

@@ -11,7 +11,7 @@
   ]>
 ]>
 hal.executable @simpleMath_ex_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @add_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -55,7 +55,7 @@
   ]>
 ]>
 hal.executable @dot_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dot_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -126,7 +126,7 @@
   ]>
 ]>
 hal.executable @dot_dispatch_0 {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dot_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -179,7 +179,7 @@
   ]>
 ]>
 hal.executable @conv2d_dispatch_0 {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @conv2d_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index, %arg7 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7
@@ -227,7 +227,7 @@
   ]>
 ]>
 hal.executable @simpleMath_ex_dispatch_0 {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @add_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -267,7 +267,7 @@
   ]>
 ]>
 hal.executable @reduction_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @reduction layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -314,7 +314,7 @@
   ]>
 ]>
 hal.executable @vector_add_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @vector_add_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -367,7 +367,7 @@
   ]>
 ]>
 hal.executable @vector_reduction_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @vector_reduction_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -413,7 +413,7 @@
   ]>
 ]>
 hal.executable @mma_fused {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -496,7 +496,7 @@
   ]>
 ]>
 hal.executable @mma_fused_fp16 {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -583,7 +583,7 @@
 #map5 = affine_map<(d0)[s0] -> (-d0 + 32, s0)>
 #map6 = affine_map<(d0)[s0] -> (-d0 + 64, s0)>
   hal.executable @large_dot_general_dispatch_0 {
-    hal.executable.variant public @cuda, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export @large_dot_general_dispatch_0 layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 :index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -663,7 +663,7 @@
 #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #map6 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
   hal.executable public @split_k_gemm {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @split_k_gemm ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index, %arg4 : index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -726,7 +726,7 @@
 ]>
 #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>
   hal.executable public @pooling_dynamic {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @pooling_dynamic ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 :index, %arg4 : index, %arg5 : index, %arg6 : index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5, %arg6
@@ -771,7 +771,7 @@
   ]>
 ]>
 hal.executable @warp_reduction_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @warp_reduction_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -824,7 +824,7 @@
   ]>
 ]>
 hal.executable @warp_reduction_broadcast_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @warp_reduction_broadcast_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -873,7 +873,7 @@
 //         CHECK:     nvvm.barrier0
 //         CHECK:     llvm.load {{.*}} : !llvm.ptr<3> -> f32
 // CHECK-COUNT-3:     nvvm.shfl.sync  bfly
-//         CHECK:     llvm.fdiv %{{.*}}, %{{.*}} 
+//         CHECK:     llvm.fdiv %{{.*}}, %{{.*}}
 //         CHECK:     llvm.store %{{.*}}, %{{.*}} {alignment = 4 : i64} : vector<4xf32>, !llvm.ptr<1>
 
 // -----
@@ -885,7 +885,7 @@
   ]>
 ]>
 hal.executable private @shared_mem_alloc {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @shared_mem_alloc ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5
@@ -936,7 +936,7 @@
 #map0 = affine_map<(d0, d1) -> (d1, d0)>
 #map1 = affine_map<(d0, d1) -> (d0, d1)>
 hal.executable private @shared_mem_transpose  {
-  hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant @cuda target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export @shared_mem_transpose layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
index c404633..b7f2235 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir

@@ -7,7 +7,7 @@
   ]>
 ]>
 hal.executable @static_pack {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @static_pack layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
index 099bf20..20d4ac7 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir

@@ -7,7 +7,7 @@
   ]>
 ]>
 hal.executable @warp_reduction_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @warp_reduction_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -108,7 +108,7 @@
   ]>
 ]>
 hal.executable @warp_reduction_broadcast_dispatch {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @warp_reduction_broadcast_dispatch layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -199,7 +199,7 @@
   ]>
 ]>
 hal.executable @softmax {
-hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
   hal.executable.export @softmax layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir
index f2af399..20213aa 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir

@@ -7,7 +7,7 @@
   ]>
 ]>
 hal.executable @softmax {
-hal.executable.variant @rocm, target = <"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}> {
+hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}>) {
   hal.executable.export @softmax layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
index 1596594..774522a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))" %s | FileCheck %s
 
 hal.executable @small_reduction {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @small_reduction ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -47,7 +47,7 @@
 // -----
 
 hal.executable @group_reduction {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_reduction ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -110,7 +110,7 @@
 // -----
 
 hal.executable @group_elementwise_reduction_elementwise {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_elementwise_reduction_elementwise ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -181,7 +181,7 @@
 // -----
 
 hal.executable @group_reduction_larger {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_reduction_larger ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -245,7 +245,7 @@
 // -----
 
 hal.executable @group_reduction_1d {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_reduction_1d ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -278,7 +278,7 @@
 // -----
 
 hal.executable @group_elementwise_reduction_elementwise_4d {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_elementwise_reduction_elementwise_4d ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -320,7 +320,7 @@
 // -----
 
 hal.executable @group_reduction_i8_12345 {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @group_reduction_i8_12345 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -404,12 +404,12 @@
 #map1 = affine_map<(d0, d1) -> (d0)>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 #device_target_cuda = #hal.device.target<"cuda", {executable_targets = [#executable_target_cuda_nvptx_fb], legacy_sync}>
-  
+
 hal.executable @reduction_2d_trailing_elementwise_static_dispatch_0 {
-  hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export public @reduction_2d_trailing_elementwise_static_dispatch_0_generic_128x10_f32 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
-      %x, %y, %z = flow.dispatch.workgroup_count_from_slice 
+      %x, %y, %z = flow.dispatch.workgroup_count_from_slice
       hal.return %x, %y, %z : index, index, index
     }
     builtin.module {
@@ -466,7 +466,7 @@
 // -----
 
 hal.executable private @i4_dequant_matvec {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @i4_dequant_matvec ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer, ReadOnly>, <4, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir
index eff44d7..c75c3db 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))" %s | FileCheck %s
 
 hal.executable @group_reduction_1d {
-hal.executable.variant public @rocm_hsaco_fb, target = <"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}> {
+hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}>) {
   hal.executable.export public @group_reduction_1d ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -34,7 +34,7 @@
 // -----
 
 hal.executable private @i4_dequant_matvec {
-  hal.executable.variant public @rocm_hsaco_fb, target = <"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}> {
+  hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb", {target_arch = "gfx1100"}>) {
     hal.executable.export public @i4_dequant_matvec ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer, ReadOnly>, <3, storage_buffer, ReadOnly>, <4, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
index 10e91ec..adb5caa 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir

@@ -11,7 +11,7 @@
   ]>
 ]>
 hal.executable @simpleMath_ex_dispatch_0 {
-  hal.executable.variant @rocm, target = <"rocm", "rocm-hsaco-fb"> {
+  hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
   hal.executable.export @add_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1
@@ -55,7 +55,7 @@
   ]>
 ]>
 hal.executable @dot_dispatch_0 {
-  hal.executable.variant @rocm, target = <"rocm", "rocm-hsaco-fb"> {
+  hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
     hal.executable.export @dot_dispatch_0 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
index ff0f7fa..77b98db 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir

@@ -22,10 +22,10 @@
 #device_target_cuda = #hal.device.target<"cuda", {executable_targets = [#executable_target_cuda_nvptx_fb], legacy_sync}>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable private @batch_matmul_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @batch_matmul_dispatch_0_generic_128x80x320x32_f32 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device):
-        %x, %y, %z = flow.dispatch.workgroup_count_from_slice 
+        %x, %y, %z = flow.dispatch.workgroup_count_from_slice
         hal.return %x, %y, %z : index, index, index
       }
       builtin.module {
@@ -95,7 +95,7 @@
 // CHECK:   %[[TILED_LINALG:.+]], %[[LOOPS:.+]] = transform.structured.tile_using_for %tiled_op
 // DEFAULT:   [0, 0, 0, 16]
 // OPTIONS:   [0, 0, 0, 8]
-// CHECK:   %[[PADDED:.+]], %{{.*}}, %{{.+}} = transform.structured.pad %tiled_linalg_op 
+// CHECK:   %[[PADDED:.+]], %{{.*}}, %{{.+}} = transform.structured.pad %tiled_linalg_op
 // CHECK:     pack_paddings = [1, 1, 1, 1], pad_to_multiple_of = [1, 1, 1, 1], padding_dimensions = [0, 1, 2, 3]
 // CHECK:     padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]}
 // CHECK:   %[[V3:.+]] = get_producer_of_operand %[[PADDED]][2]
@@ -104,7 +104,7 @@
 // CHECK:   transform.iree.apply_licm
 // CHECK:   transform.iree.apply_cse
 // CHECK:   %[[FILL:.+]] = transform.structured.match ops{["linalg.fill"]}
-// CHECK:   apply_patterns 
+// CHECK:   apply_patterns
 // CHECK:   transform.iree.apply_licm
 // CHECK:   transform.iree.apply_cse
 // CHECK:   transform.structured.match ops{["tensor.parallel_insert_slice"]}
@@ -113,7 +113,7 @@
 // CHECK:   %[[RHS:.+]] = get_producer_of_operand %[[PADDED]][1]
 // CHECK:   %[[RHS_DPS:.+]] = transform.structured.rewrite_in_destination_passing_style %[[RHS]]
 
-// CHECK:   transform.structured.tile_using_forall %[[LHS]] 
+// CHECK:   transform.structured.tile_using_forall %[[LHS]]
 // DEFAULT:  num_threads [1, 32, 4] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
 // OPTIONS:  num_threads [1, 64, 2] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
 // CHECK:   apply_patterns
@@ -122,10 +122,10 @@
 // CHECK:   transform.structured.match ops{["scf.if"]}
 // CHECK:   transform.scf.take_assumed_branch %{{.*}} take_else_branch
 
-// CHECK:   transform.structured.tile_using_forall %[[RHS_DPS]]  
+// CHECK:   transform.structured.tile_using_forall %[[RHS_DPS]]
 // DEFAULT:  num_threads [8, 16, 1] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
 // OPTIONS:  num_threads [2, 8, 8] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
-// CHECK:   apply_patterns 
+// CHECK:   apply_patterns
 // CHECK:   transform.iree.apply_licm
 // CHECK:   transform.iree.apply_cse
 
@@ -139,7 +139,7 @@
 // CHECK:   transform.structured.tile_using_forall
 // DEFAULT:  num_threads [1, 2, 64] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
 // OPTIONS:  num_threads [1, 4, 32] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
-// CHECK:   apply_patterns 
+// CHECK:   apply_patterns
 // CHECK:   transform.iree.apply_licm
 // CHECK:   transform.iree.apply_cse
 
@@ -211,7 +211,7 @@
 // OPTIONS:   factor = 3
 // CHECK:   apply_patterns
 // CHECK:     transform.apply_patterns.vector.transfer_to_scf   max_transfer_rank = 1 full_unroll = true
-// CHECK:   apply_patterns 
+// CHECK:   apply_patterns
 // CHECK:   transform.iree.apply_licm
 // CHECK:   transform.iree.apply_cse
 // CHECK:   transform.iree.create_async_groups

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir
index ca28ed6..eca430c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt %s --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target{test-lowering-configuration})))" --iree-codegen-llvmgpu-enable-transform-dialect-implicit-gemm-strategy | FileCheck %s
 
 hal.executable @nchw_convolution {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @nchw_convolution ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -72,7 +72,7 @@
 // -----
 
 hal.executable @nhwc_convolution {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @nhwc_convolution ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -118,7 +118,7 @@
 // -----
 
 hal.executable @unaligned_convolution {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @unaligned_convolution ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir
index 150dc1f..c596a06 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir

@@ -40,7 +40,7 @@
 // RUN: | FileCheck --check-prefix=SMALL %s
 
 hal.executable @matmul_1 {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_1 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -102,7 +102,7 @@
 // CHECK: apply_patterns to %{{.*}} {
 // CHECK:   transform.apply_patterns.iree.unroll_vectors_gpu_mma_sync
 // CHECK: } : !transform.any_op
-// CHECK: transform.structured.match ops{["scf.for"]} in %{{.*}} 
+// CHECK: transform.structured.match ops{["scf.for"]} in %{{.*}}
 // CHECK: transform.iree.synchronize_loop %{{.*}}
 // CHECK: transform.structured.hoist_redundant_vector_transfers %{{.*}}
 // CHECK: transform.memref.erase_dead_alloc_and_stores %{{.*}}
@@ -168,7 +168,7 @@
 // WITH_OPTIONS: apply_patterns to %{{.*}} {
 // WITH_OPTIONS:   transform.apply_patterns.iree.unroll_vectors_gpu_mma_sync
 // WITH_OPTIONS: }
-// WITH_OPTIONS: transform.structured.match ops{["scf.for"]} in %{{.*}} 
+// WITH_OPTIONS: transform.structured.match ops{["scf.for"]} in %{{.*}}
 // WITH_OPTIONS: transform.iree.synchronize_loop %{{.*}}
 // WITH_OPTIONS: transform.structured.hoist_redundant_vector_transfers %{{.*}}
 // WITH_OPTIONS: transform.memref.erase_dead_alloc_and_stores %{{.*}}
@@ -206,7 +206,7 @@
 // -----
 
 hal.executable @matmul_2 {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_2 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -260,7 +260,7 @@
 // -----
 
 hal.executable @matmul_3 {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_3 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -295,7 +295,7 @@
 
 // -----
 hal.executable @matmul_4_partially_unaligned {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_4_partially_unaligned ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -366,7 +366,7 @@
 
 // -----
 hal.executable @aligned_matmul {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @aligned_matmul ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -436,7 +436,7 @@
 // -----
 
 hal.executable @matmul_5_small {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @matmul_5_small ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -479,7 +479,7 @@
 // -----
 
 hal.executable @f16_matmul {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @f16_matmul ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -516,7 +516,7 @@
 // -----
 
 hal.executable @int8_matmul {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @int8_matmul ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir
index c43e579..c424274 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir

@@ -15,7 +15,7 @@
 // RUN: | FileCheck --check-prefix=WITH_OPTIONS %s
 
 hal.executable @pad {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @pad ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -104,7 +104,7 @@
 // -----
 
 hal.executable @pad_low {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @pad_low ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -140,7 +140,7 @@
 // -----
 
 hal.executable @pad_local {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
   hal.executable.export public @pad_local ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir
index ff75a6f..a162fca 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir

@@ -10,7 +10,7 @@
 #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>
 
 hal.executable private @reduce_dispatch_0 {
-  hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
     hal.executable.export public @reduce_dispatch_0 ordinal(0) layout(#pipeline_layout) attributes { workgroup_size = [64: index, 1: index, 1: index], subgroup_size = 32 : index }
     builtin.module {
       func.func @reduce_dispatch_0() {

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir
index ea440b1..818a7fe 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir

@@ -7,7 +7,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 #translation = #iree_codegen.translation_info<TransformDialectCodegen>
 hal.executable private @distribute {
-  hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+  hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
 // CHECK: hal.executable.export {{.*}} attributes
 // CHECK-SAME: subgroup_size = 32
 // CHECK-SAME: workgroup_size = [256 : index, 1 : index, 1 : index]
@@ -43,15 +43,15 @@
         scf.forall (%arg0) in (%c8) {
           vector.transfer_write %cst_0, %subview[%arg0]
           {in_bounds = [true]} : vector<1xf16>, memref<1xf16, strided<[1], offset: ?>>
-        } {mapping = [#gpu.warp<x>]}        
+        } {mapping = [#gpu.warp<x>]}
         return
       }
       module {
         transform.sequence failures(propagate) {
         ^bb0(%variant_op: !transform.any_op):
-        %17 = transform.structured.match ops{["func.func"]} in %variant_op 
+        %17 = transform.structured.match ops{["func.func"]} in %variant_op
           : (!transform.any_op) -> !transform.any_op
-        transform.iree.map_nested_forall_to_gpu_threads %17 
+        transform.iree.map_nested_forall_to_gpu_threads %17
           workgroup_dims = [256, 1, 1] subgroup_size = 32 : (!transform.any_op) -> ()
 
         // Late canonicalizations to cleanup and pass the checks.

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir
index acc61d8..89126da 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir

@@ -5,7 +5,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_dispatch_0_generic_4096x4096 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -64,7 +64,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_single_operand_dispatch_0_generic_768x2048 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -129,7 +129,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_3d_no_dispatch_0_generic_768x2048x1024 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_3d_no_dispatch_0_generic_768x2048x1024 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -169,7 +169,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_3d_yes_dispatch_0_generic_10x768x2048 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -234,7 +234,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -302,7 +302,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable @transpose_3d_diff_dispatch_0_generic_10x768x2048 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @transpose_3d_diff_dispatch_0_generic_10x768x2048 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/workgroup_specialization_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/workgroup_specialization_pipeline_test.mlir
index 5b5fbf1..981d874 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/workgroup_specialization_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/workgroup_specialization_pipeline_test.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))" %s | FileCheck %s
 module attributes {hal.device.targets = [#hal.device.target<"cuda", {executable_targets = [#hal.executable.target<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>]}>]} {
   hal.executable private @forward_dispatch_116 {
-    hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+    hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
       hal.executable.export public @forward_dispatch_116_matmul_128x30522x768 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -60,7 +60,7 @@
 #device_target_cuda = #hal.device.target<"cuda", {executable_targets = [#executable_target_cuda_nvptx_fb], legacy_sync}>
 module attributes {hal.device.targets = [#device_target_cuda]} {
   hal.executable private @vectorized_dispatch_0 {
-    hal.executable.variant public @cuda_nvptx_fb, target = #executable_target_cuda_nvptx_fb {
+    hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
       hal.executable.export public @vectorized_dispatch_0_generic_102401 ordinal(0) layout(#pipeline_layout) {
       ^bb0(%arg0: !hal.device, %arg1: index):
         %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
index 31583c6..5ea416a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @conv_112x112x512 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @conv_112x112x512 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_112x112x512() {
@@ -64,13 +64,13 @@
   ]>
 ]>
 hal.executable @conv_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @conv_112x112x32 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_112x112x32() {
@@ -118,13 +118,13 @@
   ]>
 ]>
 hal.executable @conv_16x16x16 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @conv_16x16x16 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_16x16x16() {
@@ -172,13 +172,13 @@
   ]>
 ]>
 hal.executable @dwconv_28x28x144 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @dwconv_28x28x144 layout(#pipeline_layout)
     builtin.module {
       func.func @dwconv_28x28x144() {
@@ -226,13 +226,13 @@
   ]>
 ]>
 hal.executable @dwconv_4x4x8 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @dwconv_4x4x8 layout(#pipeline_layout)
     builtin.module {
       func.func @dwconv_4x4x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
index 8027e66..f060fbb 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_1024x2048x512 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_1024x2048x512() {
@@ -64,13 +64,13 @@
   ]>
 ]>
 hal.executable @matmul_3136x24x96 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_3136x24x96 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_3136x24x96() {
@@ -118,13 +118,13 @@
   ]>
 ]>
 hal.executable @matmul_196x64x192 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_196x64x192 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_196x64x192() {
@@ -172,13 +172,13 @@
   ]>
 ]>
 hal.executable @matmul_12544x96x16 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_12544x96x16 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_12544x96x16() {
@@ -218,13 +218,13 @@
   ]>
 ]>
 hal.executable @matmul_49x160x576 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_49x160x576 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_49x160x576() {
@@ -270,13 +270,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x384x384 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_4x384x384 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_4x384x384() {
@@ -324,13 +324,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x8x8 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Qualcomm:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_4x8x8 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_4x8x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
index 270be68..8541ec8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir

@@ -9,13 +9,13 @@
 ]>
 
 hal.executable private @nhwc_conv_pointwise_2x64x64x320 {
-  hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @nhwc_conv_pointwise_2x64x64x320 layout(#pipeline_layout)
     builtin.module {
       func.func @nhwc_conv_pointwise_2x64x64x320() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
index 39f1a68..84315bb 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir

@@ -8,13 +8,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_f32_16x4096x40x4096 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_f32_16x4096x40x4096 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_f32_16x4096x40x4096() {
@@ -52,13 +52,13 @@
   ]>
 ]>
 hal.executable @matmul_f16_64x640x320 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float16], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_f16_64x640x320 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_f16_64x640x320() {
@@ -98,13 +98,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_f32_16x4096x40x4096 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_f32_16x4096x40x4096 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_f32_16x4096x40x4096() {
@@ -144,13 +144,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_f16_1x4096x4096x512 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_f16_1x4096x4096x512 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_f16_1x4096x4096x512() {
@@ -197,13 +197,13 @@
   ]>
 ]>
 hal.executable @matmul_multi_reduce_i4xf32xf32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @matmul_multi_reduce_i4xf32xf32 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_multi_reduce_i4xf32xf32() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
index 393d80d..9a3d937 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir

@@ -13,7 +13,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x128_div_add {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -28,7 +28,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_add layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_256x1024x128_div_add() {
@@ -93,7 +93,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x128x256x512_div {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -108,7 +108,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x128x256x512_div layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_16x128x256x512_div() {
@@ -161,7 +161,7 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x8x512x64 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -176,7 +176,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-     >}> {
+     >}>) {
     hal.executable.export @generic_batch_matmul_32x8x512x64 layout(#pipeline_layout)
     builtin.module {
       func.func @generic_batch_matmul_32x8x512x64() {
@@ -231,7 +231,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x1024x1024x80 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -246,7 +246,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x1024x1024x80 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_16x1024x1024x80() {
@@ -289,7 +289,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x8 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -304,7 +304,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x8 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_256x1024x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
index 1e6e3a7..3af4885 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir

@@ -19,13 +19,13 @@
   ]>
 ]>
 hal.executable private @nhwc_conv_pointwise_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @nhwc_conv_pointwise_112x112x32 layout(#pipeline_layout)
     builtin.module {
       func.func @nhwc_conv_pointwise_112x112x32() {
@@ -82,13 +82,13 @@
 ]>
 
 hal.executable private @nchw_conv_2x1280x8x8 {
-  hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @nchw_conv_2x1280x8x8 layout(#pipeline_layout)
     builtin.module {
       func.func @nchw_conv_2x1280x8x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
index b900dae..321eb6d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir

@@ -6,13 +6,13 @@
   ]>
 ]>
 hal.executable private @static_1d_sort {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @static_1d_sort layout(#pipeline_layout)
     builtin.module {
       func.func @static_1d_sort() {
@@ -51,13 +51,13 @@
   ]>
 ]>
 hal.executable private @static_3d_sort {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @static_3d_sort layout(#pipeline_layout)
     builtin.module {
       func.func @static_3d_sort() {
@@ -100,13 +100,13 @@
   ]>
 ]>
 hal.executable private @static_1d_fft_stage2 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirvfb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirvfb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @static_1d_fft_stage2 layout(#pipeline_layout)
     builtin.module {
       func.func @static_1d_fft_stage2() {
@@ -145,13 +145,13 @@
   ]>
 ]>
 hal.executable private @static_3d_fft_stage3 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirvfb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirvfb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @static_3d_fft_stage3 layout(#pipeline_layout)
     builtin.module {
       func.func @static_3d_fft_stage3() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
index 5b4e62f..38c6574 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir

@@ -7,13 +7,13 @@
   ]>
 ]>
 hal.executable @copy_as_generic {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @copy_as_generic layout(#pipeline_layout)
     builtin.module {
       func.func @copy_as_generic() {
@@ -51,13 +51,13 @@
   ]>
 ]>
 hal.executable @tensor_insert {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @copy layout(#pipeline_layout)
     builtin.module {
       func.func @copy() {
@@ -97,13 +97,13 @@
   ]>
 ]>
 hal.executable @avg_pool {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @avg_pool layout(#pipeline_layout)
     builtin.module {
       func.func @avg_pool() {
@@ -145,13 +145,13 @@
   ]>
 ]>
 hal.executable @avg_pool {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 4>>
-    }> {
+    }>) {
     hal.executable.export public @avg_pool layout(#pipeline_layout)
     builtin.module {
       func.func @avg_pool() {
@@ -202,13 +202,13 @@
   ]>
 ]>
 hal.executable @max_pool {
-  hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @max_pool layout(#pipeline_layout)
     builtin.module  {
       func.func @max_pool() {
@@ -256,13 +256,13 @@
 ]>
 
 hal.executable @elementwise {
-  hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @elementwise layout(#pipeline_layout)
     builtin.module {
       func.func @elementwise() {
@@ -309,13 +309,13 @@
 #map22 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
 
 hal.executable @dwconv_elementwise {
-  hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @dwconv_elementwise layout(#pipeline_layout)
     builtin.module  {
       func.func @dwconv_elementwise() {
@@ -372,13 +372,13 @@
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
 
 hal.executable @outermost_reduction {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @outermost_reduction layout(#pipeline_layout)
     builtin.module {
       func.func @outermost_reduction() {
@@ -423,13 +423,13 @@
 #map1 = affine_map<(d0, d1) -> (d0)>
 
 hal.executable private @innermost_reduction {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @innermost_reduction ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @innermost_reduction() {
@@ -480,13 +480,13 @@
   ]>
 ]>
 hal.executable @four_dim_elementwise {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @four_dim_elementwise ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -530,13 +530,13 @@
 ]>
 
 hal.executable private @odd_reduction_dimension_size_501 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @odd_reduction_dimension_size_501 ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @odd_reduction_dimension_size_501() {
@@ -589,13 +589,13 @@
 ]>
 
 hal.executable private @odd_reduction_dimension_size_2809 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @odd_reduction_dimension_size_2809 ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @odd_reduction_dimension_size_2809() {
@@ -648,13 +648,13 @@
 ]>
 
 hal.executable private @broadcast {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @broadcast ordinal(0) layout(#pipeline_layout)
     builtin.module {
       func.func @broadcast() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
index ff253ee..a7566bd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_1x3x32 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @batch_matmul_1x3x32 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_1x3x32() {
@@ -65,13 +65,13 @@
   ]>
 ]>
 hal.executable private @matmul_64x16xi8 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 64>>
-  }> {
+  }>) {
     hal.executable.export public @matmul_64x16xi8 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_64x16xi8() {
@@ -119,13 +119,13 @@
   ]>
 ]>
 hal.executable private @matmul_64x16xi64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, Int64], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 64>>
-  }> {
+  }>) {
     hal.executable.export public @matmul_64x16xi64 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_64x16xi64() {
@@ -173,13 +173,13 @@
   ]>
 ]>
 hal.executable @matmul_400x273 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @matmul_400x273 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_400x273() {
@@ -238,13 +238,13 @@
   ]>
 ]>
 hal.executable @matmul_25x546 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 64>>
-  }> {
+  }>) {
     hal.executable.export public @matmul_25x546 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_25x546() {
@@ -312,13 +312,13 @@
   ]>
 ]>
 hal.executable private @matmul_pointwise_256x1024 {
-  hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @matmul_pointwise_256x1024 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_pointwise_256x1024() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matvec.mlir
index 0774abb..16c9346 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matvec.mlir

@@ -11,13 +11,13 @@
 ]>
 
 hal.executable @i4_dequant_matvec_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f32 layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant_matvec_f32() {
@@ -77,13 +77,13 @@
 ]>
 
 hal.executable @i4_dequant_matvec_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f32 layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant_matvec_f32() {
@@ -158,13 +158,13 @@
 ]>
 
 hal.executable @i4_dequant_matvec_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f32 layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant_matvec_f32() {
@@ -227,7 +227,7 @@
         } -> tensor<4096x86x128xf32>
         %41 = linalg.generic {
             indexing_maps = [
-                affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>, 
+                affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>,
                 affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>,
                 affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
@@ -265,7 +265,7 @@
   ]>
 ]>
 hal.executable @i4_dequant_matvec_f16 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<
         #spirv.vce<v1.4, [Shader, Float16, StorageBuffer16BitAccess, GroupNonUniform, GroupNonUniformShuffle], [SPV_KHR_16bit_storage]>,
         Unknown:IntegratedGPU,
@@ -274,7 +274,7 @@
           max_compute_workgroup_invocations = 1024,
           max_compute_workgroup_size = [1024, 1024, 64],
           subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f16 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -352,13 +352,13 @@
 ]>
 
 hal.executable @i4_dequant_matvec {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant_matvec() {
@@ -421,7 +421,7 @@
         } -> tensor<4096x86x128xf32>
         %41 = linalg.generic {
             indexing_maps = [
-                affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>, 
+                affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>,
                 affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>,
                 affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
@@ -460,13 +460,13 @@
 ]>
 
 hal.executable @i4_dequant_matvec {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant_matvec() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
index d25c7ac..891cf7b 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir

@@ -7,13 +7,13 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @subgroup_reduce_f32 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -62,13 +62,13 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce_f16 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float16, GroupNonUniformShuffle], []>, Unknown:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 65536,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
        subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export public @subgroup_reduce_f16 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir
index d3b0882..60852a3 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir

@@ -9,13 +9,13 @@
   ]>
 ]>
 hal.executable @i4_dequant {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant layout(#pipeline_layout)
     builtin.module {
       func.func @i4_dequant() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
index 1b2fe37..32edc09 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @conv_112x112x512 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @conv_112x112x512 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_112x112x512() {
@@ -65,13 +65,13 @@
   ]>
 ]>
 hal.executable @conv_112x112x32 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @conv_112x112x32 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_112x112x32() {
@@ -119,13 +119,13 @@
   ]>
 ]>
 hal.executable @conv_16x16x16 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @conv_16x16x16 layout(#pipeline_layout)
     builtin.module {
       func.func @conv_16x16x16() {
@@ -172,13 +172,13 @@
   ]>
 ]>
 hal.executable @dwconv_28x28x144 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @dwconv_28x28x144 layout(#pipeline_layout)
     builtin.module {
       func.func @dwconv_28x28x144() {
@@ -226,13 +226,13 @@
   ]>
 ]>
 hal.executable @dwconv_1x2x8 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @dwconv_1x2x8 layout(#pipeline_layout)
     builtin.module {
       func.func @dwconv_1x2x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
index 6f9fa0b..86492a2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_1024x2048x512 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_1024x2048x512() {
@@ -62,13 +62,13 @@
   ]>
 ]>
 hal.executable @matmul_3136x24x96 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_3136x24x96 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_3136x24x96() {
@@ -115,13 +115,13 @@
   ]>
 ]>
 hal.executable @matmul_196x64x192 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_196x64x192 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_196x64x192() {
@@ -169,13 +169,13 @@
   ]>
 ]>
 hal.executable @matmul_12544x96x16 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_12544x96x16 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_12544x96x16() {
@@ -216,13 +216,13 @@
   ]>
 ]>
 hal.executable @matmul_49x160x576 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_49x160x576 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_49x160x576() {
@@ -271,13 +271,13 @@
 ]>
 
 hal.executable @matmul_2x1024x576 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_2x1024x576 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_2x1024x576() {
@@ -327,13 +327,13 @@
   ]>
 ]>
 hal.executable @matmul_1024x2048x512xi8 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @matmul_1024x2048x512xi8 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_1024x2048x512xi8() {
@@ -379,13 +379,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x384x384 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_4x384x384 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_4x384x384() {
@@ -434,13 +434,13 @@
   ]>
 ]>
 hal.executable @batch_matmul_4x2x8 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @batch_matmul_4x2x8 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_4x2x8() {
@@ -489,13 +489,13 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x2x512 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @generic_batch_matmul_32x2x512 layout(#pipeline_layout)
     builtin.module {
       func.func @generic_batch_matmul_32x2x512() {
@@ -548,13 +548,13 @@
 ]>
 
 hal.executable @generic_batch_matmul_8x2500x512x4608 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @generic_batch_matmul_8x2500x512x4608 layout(#pipeline_layout)
     builtin.module {
       func.func @generic_batch_matmul_8x2500x512x4608() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
index a7476c4..781c618 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir

@@ -8,13 +8,13 @@
   ]>
 ]>
 hal.executable @matmul_4x4096x9216 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @matmul_4x4096x9216 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_4x4096x9216() {
@@ -59,13 +59,13 @@
   ]>
 ]>
 hal.executable @matmul_1x4096x9216 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @matmul_1x4096x9216 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_1x4096x9216() {
@@ -110,13 +110,13 @@
   ]>
 ]>
 hal.executable private @multi_reduction_transposed_b_matmul {
-  hal.executable.variant public @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 49152,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @multi_reduction_transposed_b_matmul layout(#pipeline_layout)
     builtin.module {
       func.func @multi_reduction_transposed_b_matmul() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index 9d6380d..34c6525 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir

@@ -13,7 +13,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x128_div_add {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -34,7 +34,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_add layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_256x1024x128_div_add() {
@@ -99,7 +99,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x128x256x512_div {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -120,7 +120,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x128x256x512_div layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_16x128x256x512_div() {
@@ -173,7 +173,7 @@
   ]>
 ]>
 hal.executable @generic_batch_matmul_32x8x512x64 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -194,7 +194,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-     >}> {
+     >}>) {
     hal.executable.export @generic_batch_matmul_32x8x512x64 layout(#pipeline_layout)
     builtin.module {
       func.func @generic_batch_matmul_32x8x512x64() {
@@ -249,7 +249,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x1024x1024x80 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -270,7 +270,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x1024x1024x80 layout(#pipeline_layout)
     builtin.module {
       func.func @batch_matmul_16x1024x1024x80() {
@@ -313,7 +313,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x8 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -334,7 +334,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x8 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_256x1024x8() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir
index c73503d..8e262ff 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir

@@ -12,13 +12,13 @@
   ]>
 ]>
 hal.executable public @user_config {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
           max_compute_shared_memory_size = 16384,
           max_compute_workgroup_invocations = 128,
           max_compute_workgroup_size = [128, 128, 64],
           subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export public @matmul_128x1024x256 layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_128x1024x256() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
index c5154f6..32fd746 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir

@@ -8,8 +8,8 @@
   ]>
 ]>
 hal.executable private @push_constant {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @push_constant layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -49,8 +49,8 @@
   ]>
 ]>
 hal.executable private @resource_bindings_in_same_func {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @resource_bindings_in_same_func layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -110,8 +110,8 @@
   ]>
 ]>
 hal.executable private @resource_bindings_in_multi_entry_func {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @resource_bindings_in_entry_func1 layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -171,8 +171,8 @@
   ]>
 ]>
 hal.executable private @interface_binding {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @interface_binding layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -217,8 +217,8 @@
   ]>
 ]>
 hal.executable private @interface_wg_id {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @interface_wg_id layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -253,8 +253,8 @@
   ]>
 ]>
 hal.executable private @interface_wg_count {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @interface_wg_count layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
index a2b7338..edeeda2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir

@@ -10,8 +10,8 @@
   ]>
 ]>
 hal.executable private @buffer_types {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @buffer_types layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -47,8 +47,8 @@
 // -----
 
 hal.executable private @emulate_1d_vector {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export public @emulate_1d_vector ordinal(0)
       layout(#hal.pipeline.layout<push_constants = 0,
                                   sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
@@ -108,8 +108,8 @@
   ]>
 ]>
 hal.executable private @no_emulation {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, Int64], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, Int64], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @no_emulation layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
index 8f794a7..243ec0e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir

@@ -14,13 +14,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -50,13 +50,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     // expected-error @+1 {{expected workgroup size to have three dimensions for SPIR-V pipelines}}
     builtin.module {
@@ -87,13 +87,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -124,13 +124,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -161,13 +161,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -198,13 +198,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -235,13 +235,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -272,13 +272,13 @@
   ]>
 ]>
 hal.executable private @matmul_tensors {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -309,7 +309,7 @@
   ]>
 ]>
 hal.executable public @matmul_tensor {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -324,7 +324,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_tensor layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensor() {
@@ -362,7 +362,7 @@
   ]>
 ]>
 hal.executable public @matmul_tensor {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -377,7 +377,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_tensor layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensor() {
@@ -415,7 +415,7 @@
   ]>
 ]>
 hal.executable public @matmul_tensor {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -430,7 +430,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_tensor layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensor() {
@@ -468,7 +468,7 @@
   ]>
 ]>
 hal.executable public @matmul_tensor {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -483,7 +483,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_tensor layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensor() {
@@ -521,7 +521,7 @@
   ]>
 ]>
 hal.executable public @matmul_tensor {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -536,7 +536,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_tensor layout(#pipeline_layout)
     builtin.module {
       func.func @matmul_tensor() {
@@ -574,13 +574,13 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module  {
       func.func @illegal() {
@@ -640,13 +640,13 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module  {
       func.func @illegal() {
@@ -706,13 +706,13 @@
   ]>
 ]>
 hal.executable private @conv_2d_nhwc_hwcf {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module  {
       func.func @illegal() {
@@ -772,13 +772,13 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_2d_nhwc_hwc {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {
@@ -810,13 +810,13 @@
   ]>
 ]>
 hal.executable private @depthwise_conv_2d_nhwc_hwc {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 16384,
         max_compute_workgroup_invocations = 128,
         max_compute_workgroup_size = [128, 128, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @illegal layout(#pipeline_layout)
     builtin.module {
       func.func @illegal() {

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir
index 112f2d1..fe77ee1 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir

@@ -15,12 +15,12 @@
 ]>
 
 hal.executable @matmul_i4_quant_weight {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_i4_quant_weight ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir
index c8db70b..edb222a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir

@@ -17,12 +17,12 @@
 #map = affine_map<(d0, d1) -> (d0, d1)>
 
 hal.executable @matmul_f32_128x256x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f32_128x256x64 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -110,12 +110,12 @@
 #map = affine_map<(d0, d1) -> (d0, d1)>
 
 hal.executable @matmul_f32_128x256x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f32_128x256x64 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -218,12 +218,12 @@
 ]>
 
 hal.executable @matmul_f16_4096x512x512 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f16_4096x512x512 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir
index adc1abe..3f7e9d5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @i4_dequant_matvec_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniform, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f32 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
index ec0c520..6329dd8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir

@@ -10,7 +10,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 
 hal.executable @warp_reduction_dispatch {
-  hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export public @warp_reduction_dispatch ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -106,7 +106,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 
 hal.executable @warp_reduction_dispatch {
-  hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export public @warp_reduction_dispatch ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -201,7 +201,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 
 hal.executable @softmax {
-hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
   hal.executable.export public @softmax ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
index bd91c14..34ca23d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir

@@ -7,7 +7,7 @@
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
 
 hal.executable @scalar_dispatch {
-  hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export public @scalar_dispatch ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/map_memref_storage_class.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/map_memref_storage_class.mlir
index 6aa67af..2abb856 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/map_memref_storage_class.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/map_memref_storage_class.mlir

@@ -7,8 +7,8 @@
   ]>
 ]>
 hal.executable private @vulkan_client_api {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @vulkan_client_api layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }
@@ -54,8 +54,8 @@
   ]>
 ]>
 hal.executable private @opencl_client_api {
-  hal.executable.variant @opencl, target = <"opencl-spirv", "opencl-spirv-fb", {
-      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Kernel], []>, #spirv.resource_limits<>>}> {
+  hal.executable.variant @opencl target(<"opencl-spirv", "opencl-spirv-fb", {
+      spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Kernel], []>, #spirv.resource_limits<>>}>) {
     hal.executable.export @opencl_client_api layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index]
     }

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
index 7c95a36..e2a3635 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir

@@ -13,7 +13,7 @@
 ]>
 
 hal.executable public @matmul_256x1024x128_div_exp {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -34,7 +34,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_exp layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -217,7 +217,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x128x256x512_div {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -238,7 +238,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x128x256x512_div layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -341,7 +341,7 @@
 ]>
 
 hal.executable public @matmul_32x32x32_div {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -362,7 +362,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_32x32x32_div layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -415,7 +415,7 @@
 ]>
 
 hal.executable public @generic_batch_matmul_32x128x512x64 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -436,7 +436,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @generic_batch_matmul_32x128x512x64 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -529,7 +529,7 @@
 ]>
 
 hal.executable public @matmul_256x1024x128_div_exp {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -544,7 +544,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_exp layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -658,7 +658,7 @@
   ]>
 ]>
 hal.executable public @batch_matmul_16x128x256x512_div {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -673,7 +673,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_16x128x256x512_div layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -766,7 +766,7 @@
 ]>
 
 hal.executable public @generic_batch_matmul_32x128x512x64 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -781,7 +781,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @generic_batch_matmul_32x128x512x64 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4
@@ -870,7 +870,7 @@
     workgroup_size = [32, 4, 1], subgroup_size = 32>
 
 hal.executable public @batch_matmul_f16_16x4096x4096x64_truncf_mulf {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -885,7 +885,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_f16_16x4096x4096x64_truncf_mulf layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir
index adbf908..18bc15e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir

@@ -11,12 +11,12 @@
 #map = affine_map<(d0, d1) -> (d0, d1)>
 
 hal.executable @matmul_f32_128x256x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f32_128x256x64 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -91,12 +91,12 @@
 #map = affine_map<(d0, d1) -> (d0, d1)>
 
 hal.executable @matmul_f16_128x256x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader, Float16], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f16_128x256x64 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -178,12 +178,12 @@
   workgroup_size = [16, 8, 1]>
 
 hal.executable @matmul_f16_32x1280x1280 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader, Float16, StorageBuffer16BitAccess], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f16_32x1280x1280 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index 430a8dc..fd3a28e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir

@@ -8,13 +8,13 @@
   ]>
 ]>
 hal.executable private @fuse_and_vectorize_fill_matmul {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @fuse_and_vectorize_fill_matmul layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index, %arg3 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3
@@ -59,13 +59,13 @@
   ]>
 ]>
 hal.executable private @fuse_and_vectorize_matmul_add {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export @fuse_and_vectorize_matmul_add layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
index e5d0f80..8b88ae1 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir

@@ -10,13 +10,13 @@
   ]>
 ]>
 hal.executable @i4_dequant_unit_matmul_f16 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, Float16, StorageBuffer16BitAccess, GroupNonUniform, GroupNonUniformShuffle], [SPV_KHR_16bit_storage]>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 32>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_unit_matmul_f16 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -121,13 +121,13 @@
   ]>
 ]>
 hal.executable @i4_dequant_matvec_f16_subgroup_64 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, Float16, StorageBuffer16BitAccess, GroupNonUniform, GroupNonUniformShuffle], [SPV_KHR_16bit_storage]>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 64],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant_matvec_f16_subgroup_64 layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -135,17 +135,17 @@
     }
     builtin.module {
       func.func @i4_dequant_matvec_f16_subgroup_64() {
-        %cst = arith.constant 0.000000e+00 : f16 
-        %0 = hal.interface.constant.load[0] : i32 
-        %1 = hal.interface.constant.load[1] : i32 
-        %2 = hal.interface.constant.load[2] : i32 
-        %3 = hal.interface.constant.load[3] : i32 
-        %4 = hal.interface.constant.load[4] : i32 
-        %5 = arith.index_castui %0 : i32 to index 
-        %6 = arith.index_castui %1 : i32 to index 
-        %7 = arith.index_castui %2 : i32 to index 
-        %8 = arith.index_castui %3 : i32 to index 
-        %9 = arith.index_castui %4 : i32 to index 
+        %cst = arith.constant 0.000000e+00 : f16
+        %0 = hal.interface.constant.load[0] : i32
+        %1 = hal.interface.constant.load[1] : i32
+        %2 = hal.interface.constant.load[2] : i32
+        %3 = hal.interface.constant.load[3] : i32
+        %4 = hal.interface.constant.load[4] : i32
+        %5 = arith.index_castui %0 : i32 to index
+        %6 = arith.index_castui %1 : i32 to index
+        %7 = arith.index_castui %2 : i32 to index
+        %8 = arith.index_castui %3 : i32 to index
+        %9 = arith.index_castui %4 : i32 to index
         %10 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
         %11 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
         %12 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
@@ -159,18 +159,18 @@
         %20 = tensor.empty() : tensor<4096x86x128xf16>
         %21 = linalg.fill ins(%cst : f16) outs(%19 : tensor<4096xf16>) -> tensor<4096xf16>
         %22 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%15, %16, %17 : tensor<4096x86x128xi4>, tensor<4096x86xf16>, tensor<4096x86xf16>) outs(%20 : tensor<4096x86x128xf16>) {
-        ^bb0(%in: i4, %in_0: f16, %in_1: f16, %out: f16): 
+        ^bb0(%in: i4, %in_0: f16, %in_1: f16, %out: f16):
           %24 = arith.extui %in : i4 to i32
           %25 = arith.uitofp %24 : i32 to f16
-          %26 = arith.subf %25, %in_1 : f16 
-          %27 = arith.mulf %26, %in_0 : f16 
-          linalg.yield %27 : f16 
+          %26 = arith.subf %25, %in_1 : f16
+          %27 = arith.mulf %26, %in_0 : f16
+          linalg.yield %27 : f16
         } -> tensor<4096x86x128xf16>
         %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} ins(%18, %22 : tensor<86x128xf16>, tensor<4096x86x128xf16>) outs(%21 : tensor<4096xf16>) {
-        ^bb0(%in: f16, %in_0: f16, %out: f16): 
-          %24 = arith.mulf %in, %in_0 : f16 
-          %25 = arith.addf %24, %out : f16 
-          linalg.yield %25 : f16 
+        ^bb0(%in: f16, %in_0: f16, %out: f16):
+          %24 = arith.mulf %in, %in_0 : f16
+          %25 = arith.addf %24, %out : f16
+          linalg.yield %25 : f16
         } -> tensor<4096xf16>
         flow.dispatch.tensor.store %23, %14, offsets = [0], sizes = [4096], strides = [1] : tensor<4096xf16> -> !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
         return

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
index 753cb0f..948df1e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir

@@ -7,13 +7,13 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @subgroup_reduce ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
@@ -112,13 +112,13 @@
   ]>
 ]>
 hal.executable private @subgroup_reduce {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, ARM:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @subgroup_reduce ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
index 594e151..4d1c418 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir

@@ -9,13 +9,13 @@
   ]>
 ]>
 hal.executable @i4_dequant {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
         subgroup_size = 64>>
-    }> {
+    }>) {
     hal.executable.export @i4_dequant layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device):
       %x, %y, %z = flow.dispatch.workgroup_count_from_slice

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir
index 6dd4469..58b7ac5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir

@@ -3,7 +3,7 @@
 // RUN:   --iree-spirv-enable-transform-dialect-jit=true | FileCheck %s
 
 hal.executable @matmul {
-hal.executable.variant public @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+hal.executable.variant public @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
   spirv.target_env = #spirv.target_env<
     #spirv.vce<v1.6,
     [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -18,7 +18,7 @@
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [2147483647, 65535, 65535],
       subgroup_size = 32>
-     >}> {
+     >}>) {
   hal.executable.export public @matmul ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
   ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
     %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy_from_file.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy_from_file.mlir
index 5c2fb35..bf6487a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy_from_file.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy_from_file.mlir

@@ -8,13 +8,13 @@
   ]>
 ]>
 hal.executable private @copy_f32 {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb", {
       spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Shader, GroupNonUniformShuffle], []>, Unknown:IntegratedGPU, #spirv.resource_limits<
         max_compute_shared_memory_size = 32768,
         max_compute_workgroup_invocations = 512,
         max_compute_workgroup_size = [512, 512, 512],
        subgroup_size = 16>>
-    }> {
+    }>) {
     hal.executable.export public @copy_f32 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir
index 099203d..af22e74 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir

@@ -18,7 +18,7 @@
   ]>
 ]>
 hal.executable private @matmul {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @matmul layout(#pipeline_layout) attributes {
       workgroup_size = [16: index, 8: index, 1: index],
       translation_info = #translation
@@ -88,7 +88,7 @@
   ]>
 ]>
 hal.executable private @conv_1d {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @conv_1d layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 4: index, 1: index],
       translation_info = #translation
@@ -168,7 +168,7 @@
   ]>
 ]>
 hal.executable private @conv_2d {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @conv_2d layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 4: index, 1: index],
       translation_info = #translation
@@ -283,7 +283,7 @@
   ]>
 ]>
 hal.executable private @conv_3d {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @conv_3d layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 4: index, 1: index],
       translation_info = #translation
@@ -354,7 +354,7 @@
 ]>
 module  {
   hal.executable private @pooling_nhwc_max {
-    hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
       hal.executable.export @pooling_nhwc_max layout(#pipeline_layout) attributes {
         workgroup_size = [32: index, 4: index, 1: index],
         translation_info = #translation
@@ -421,7 +421,7 @@
 ]>
 
 hal.executable @matvec {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export public @matvec ordinal(0) layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 1: index, 1: index],
       translation_info = #translation

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
index 7e402aa..ce914a6 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir

@@ -10,7 +10,7 @@
   ]>
 ]>
 hal.executable private @static_scatter_update_slice  {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan", "vulkan-spirv-fb">) {
     hal.executable.export @static_scatter_update_slice layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [16 : index, 1 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
index 81f70a0..734de4d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir

@@ -9,7 +9,7 @@
   ]>
 ]>
 hal.executable private @static_3d_sort  {
-  hal.executable.variant @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @static_3d_sort layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [16 : index, 1 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir
index 204f33e..a5a3190 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir

@@ -19,7 +19,7 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 32], [16, 16, 16], [0, 0, 32]]>
 
 hal.executable @matmul_f16_32x32x32 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -40,7 +40,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_f16_32x32x32 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize>,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -124,7 +124,7 @@
 ]>
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
 hal.executable @generic_batch_matmul_f16_32x128x512x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -145,7 +145,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @generic_batch_matmul_f16_32x128x512x64 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -274,7 +274,7 @@
 ]>
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
 hal.executable @generic_batch_matmul_f16_32x128x512x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -295,7 +295,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @generic_batch_matmul_f16_32x128x512x64 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -393,7 +393,7 @@
 ]>
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
 hal.executable @generic_batch_matmul_f16_32x128x512x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -414,7 +414,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @generic_batch_matmul_f16_32x128x512x64 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [64 : index, 2 : index, 1 : index]
@@ -511,7 +511,7 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 128], [1, 32, 64], [0, 0, 0, 32], [1, 16, 16, 16]]>
 
 hal.executable @batch_matmul_f16_1x64x128x512 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -526,7 +526,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @batch_matmul_f16_1x64x128x512 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize>,
       workgroup_size = [128 : index, 2 : index, 1 : index]
@@ -619,7 +619,7 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
 
 hal.executable @matmul_f16_f512x4096x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -634,7 +634,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_f16_f512x4096x64 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize>,
       workgroup_size = [128 : index, 2 : index, 1 : index]
@@ -736,7 +736,7 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
 
 hal.executable @matmul_f16_f512x4096x64 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -751,7 +751,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_f16_f512x4096x64 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize>,
       workgroup_size = [128 : index, 2 : index, 1 : index]
@@ -853,7 +853,7 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
 
 hal.executable @matmul_f16_128x262144x2304 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -868,7 +868,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [1024, 1024, 1024],
         subgroup_size = 64>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_f16_128x262144x2304 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize>,
       workgroup_size = [128 : index, 2 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir
index 2385e58..0cc2d76 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir

@@ -11,12 +11,12 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[128, 128], [16, 4], [0, 0, 32]]>
 
 hal.executable @matmul_f32_256x1024x128 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.5, [Shader], []>, NVIDIA:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 49152,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [65535, 65535, 65535],
-      subgroup_size = 32>>}> {
+      subgroup_size = 32>>}>) {
     hal.executable.export public @matmul_f32_256x1024x128 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [32 : index, 8 : index, 1 : index]
@@ -141,12 +141,12 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 256], [1, 8, 8], [0, 0, 0, 16]]>
 
 hal.executable @batch_matmul_16x1024x1024x80 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float16], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 65536,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [1024, 1024, 1024],
-      subgroup_size = 64>>}> {
+      subgroup_size = 64>>}>) {
     hal.executable.export public @batch_matmul_16x1024x1024x80 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [32 : index, 8 : index, 1 : index]
@@ -224,12 +224,12 @@
 #config = #iree_codegen.lowering_config<tile_sizes = [[1, 512, 8], [1, 8, 4], [0, 0, 0, 16]]>
 
 hal.executable @batch_matmul_f32_16x4096x40x4096 {
-  hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader], []>, AMD:DiscreteGPU, #spirv.resource_limits<
       max_compute_shared_memory_size = 65536,
       max_compute_workgroup_invocations = 1024,
       max_compute_workgroup_size = [1024, 1024, 1024],
-      subgroup_size = 64>>}> {
+      subgroup_size = 64>>}>) {
     hal.executable.export public @batch_matmul_f32_16x4096x40x4096 ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<SPIRVMatmulPromoteVectorize>,
       workgroup_size = [2 : index, 64 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index 76efc45..1c31b88 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir

@@ -12,7 +12,7 @@
   ]>
 ]>
 hal.executable private @fused_fill_batch_matmul {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @fused_fill_batch_matmul layout(#pipeline_layout) attributes {
       workgroup_size = [16: index, 1: index, 1: index],
       translation_info = #translation

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
index dcf863b..8d4e1d5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir

@@ -12,7 +12,7 @@
   ]>
 ]>
 hal.executable private @nhwc_conv_static_shape_f32 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @nhwc_conv_static_shape_f32 layout(#pipeline_layout) attributes {
       workgroup_size = [4: index, 4: index, 1: index],
       translation_info = #translation
@@ -86,7 +86,7 @@
   ]>
 ]>
 hal.executable private @nhwc_nhwc_depthwise_conv_static_shape_f32 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @nhwc_nhwc_depthwise_conv_static_shape_f32 layout(#pipeline_layout) attributes {
       workgroup_size = [4: index, 4: index, 4: index],
       translation_info = #translation
@@ -158,7 +158,7 @@
 ]>
 
 hal.executable private @low_padded_conv {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @low_padded_conv layout(#pipeline_layout) attributes {
       workgroup_size = [8: index, 2: index, 1: index],
       translation_info = #translation
@@ -276,7 +276,7 @@
 ]>
 
 hal.executable private @low_high_padded_nhwc_depthwise_conv {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @low_high_padded_nhwc_depthwise_conv layout(#pipeline_layout) attributes {
       workgroup_size = [8: index, 2: index, 1: index],
       translation_info = #translation
@@ -397,7 +397,7 @@
 ]>
 
 hal.executable private @nchw_conv_static_shape_f32 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @nchw_conv_static_shape_f32 layout(#pipeline_layout) attributes {
       workgroup_size = [4: index, 4: index, 1: index],
       translation_info = #translation
@@ -472,7 +472,7 @@
 ]>
 
 hal.executable private @nhwc_conv_static_shape_f16_batch2 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @nhwc_conv_static_shape_f16_batch2 layout(#pipeline_layout) attributes {
       workgroup_size = [8: index, 8: index, 1: index],
       translation_info = #translation

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index e82a5c3..e379846 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir

@@ -11,7 +11,7 @@
   ]>
 ]>
 hal.executable private @matmul_static_shape_f16 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @matmul_static_shape_f16 layout(#pipeline_layout) attributes {
       workgroup_size = [16: index, 1: index, 1: index],
       translation_info = #translation
@@ -73,7 +73,7 @@
   ]>
 ]>
 hal.executable private @matmul_static_shape_f32 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @matmul_static_shape_f32 layout(#pipeline_layout) attributes {
       workgroup_size = [16: index, 1: index, 1: index],
       translation_info = #translation

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir
index 6587c3a..00b27b0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir

@@ -12,7 +12,7 @@
 ]>
 
 hal.executable private @pooling_nhwc_sum_f32 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb"> {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
     hal.executable.export @pooling_nhwc_sum_f32 layout(#pipeline_layout) attributes {
       workgroup_size = [2: index, 2: index, 2: index],
       translation_info = #translation

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index e62e05a..d17e9d8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir

@@ -14,7 +14,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x128_div_add {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -35,7 +35,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_add layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [32 : index, 1 : index, 1 : index]
@@ -183,7 +183,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x128_div_add {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -204,7 +204,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_div_add layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [32 : index, 1 : index, 1 : index]
@@ -358,7 +358,7 @@
   ]>
 ]>
 hal.executable public @matmul_256x1024x128_mixed_signedness_int8 {
-  hal.executable.variant @vulkan, target = <"vulkan-spirv", "vulkan-spirv-fb", {
+  hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb", {
     spirv.target_env = #spirv.target_env<
       #spirv.vce<v1.6,
       [Shader, Float16, StorageBuffer16BitAccess, StorageUniform16, CooperativeMatrixKHR],
@@ -379,7 +379,7 @@
         max_compute_workgroup_invocations = 1024,
         max_compute_workgroup_size = [2147483647, 65535, 65535],
         subgroup_size = 32>
-       >}> {
+       >}>) {
     hal.executable.export public @matmul_256x1024x128_mixed_signedness_int8 layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [32 : index, 1 : index, 1 : index]

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/assign_constant_ordinals.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/assign_constant_ordinals.mlir
index 9b0bf3f..73e0c00 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/assign_constant_ordinals.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/assign_constant_ordinals.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-vmvx-assign-constant-ordinals)))" --split-input-file %s | FileCheck %s
 
 hal.executable private @executable {
-  hal.executable.variant public @variant, target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant public @variant target(#hal.executable.target<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.constant.block(%device: !hal.device) -> i32 as "foo" {
       %c0 = arith.constant 0 : i32
       hal.return %c0 : i32

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir
index a97fbaa..87e066b 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir

@@ -9,7 +9,7 @@
 ]>
 
 hal.executable private @dispatch_0 {
-  hal.executable.variant @vmvx, target = #vmvx_target {
+  hal.executable.variant @vmvx target(#vmvx_target) {
     hal.executable.constant.block(%device: !hal.device) -> i32 as "foo" {
       %c1 = arith.constant 1 : i32
       hal.return %c1 : i32
@@ -30,7 +30,7 @@
   }
 }
 hal.executable private @dispatch_1 {
-  hal.executable.variant @vmvx, target = #vmvx_target {
+  hal.executable.variant @vmvx target(#vmvx_target) {
     hal.executable.constant.block(%device: !hal.device) -> i32 as "baz" {
       %c2 = arith.constant 2 : i32
       hal.return %c2 : i32
@@ -51,7 +51,7 @@
   }
 }
 hal.executable private @dispatch_2 {
-  hal.executable.variant @vmvx, target = #vmvx_target {
+  hal.executable.variant @vmvx target(#vmvx_target) {
     hal.executable.export @dispatch_2 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -100,7 +100,7 @@
 // CHECK-NOT: hal.executable private @dispatch_1
 // CHECK-NOT: hal.executable private @dispatch_2
 // CHECK:       hal.executable private @link_executables_linked_vmvx {
-// CHECK-NEXT:    hal.executable.variant public @vmvx_bytecode_fb, target = #executable_target_vmvx_bytecode_fb {
+// CHECK-NEXT:    hal.executable.variant public @vmvx_bytecode_fb target(#executable_target_vmvx_bytecode_fb) {
 // CHECK-NEXT:      hal.executable.constant.block(%arg0: !hal.device) -> i32 as "foo"
 // CHECK-NEXT:        = arith.constant 1
 //      CHECK:      hal.executable.export public @dispatch_0 ordinal(0)
@@ -154,7 +154,7 @@
 ]>
 
 hal.executable private @dispatch_0 {
-  hal.executable.variant @vmvx, target = #vmvx_target {
+  hal.executable.variant @vmvx target(#vmvx_target) {
     hal.executable.export @dispatch_0 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -182,7 +182,7 @@
   }
 }
 hal.executable private @dispatch_1 {
-  hal.executable.variant @vmvx, target = #vmvx_target {
+  hal.executable.variant @vmvx target(#vmvx_target) {
     hal.executable.export @dispatch_1 ordinal(0) layout(#pipeline_layout) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -220,7 +220,7 @@
 // CHECK-NOT: hal.executable private @dispatch_0
 // CHECK-NOT: hal.executable private @dispatch_1
 // CHECK:       hal.executable private @link_executables_linked_vmvx {
-// CHECK:       hal.executable.variant public @vmvx_bytecode_fb, target = #executable_target_vmvx_bytecode_fb {
+// CHECK:       hal.executable.variant public @vmvx_bytecode_fb target(#executable_target_vmvx_bytecode_fb) {
 // CHECK:           module {
 // CHECK-NEXT:        vm.module public @linked_module {
 // CHECK-NEXT:          vm.rodata public @rodata_a dense<0> : tensor<1xi32>

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir
index cb252eb..52f28bf 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt  --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))" --split-input-file %s | FileCheck %s
 
 hal.executable private @mmt4d_ukernel {
-  hal.executable.variant public @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb", {ukernels = true}> {
+  hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb", {ukernels = true}>) {
     hal.executable.export public @mmt4d_i8 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
       %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
index 9d4ce46..4cb30f5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir

@@ -6,7 +6,7 @@
   ]>
 ]>
 hal.executable private @ex {
-  hal.executable.variant public @variant, target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64"> {
+  hal.executable.variant public @variant target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) {
     ^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index):
       %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
index fe037b0..23eac6d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir

@@ -164,9 +164,9 @@
 
 // -----
 
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
+#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
 #device_target_cpu = #hal.device.target<"llvm-cpu", {
-  executable_targets = [#executable_target_embedded_elf_x86_64_]
+  executable_targets = [#executable_target_embedded_elf_x86_64]
 }>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -177,7 +177,7 @@
   ]>
 ]>
 hal.executable private @ex {
-  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+  hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
     hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
       translation_info = #iree_codegen.translation_info<CPUDefault>
     } {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
index e2f795e..fc27fe4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td

@@ -2128,8 +2128,8 @@
   let assemblyFormat = [{
     custom<SymbolVisibility>($sym_visibility)
     $sym_name
-    `,` `target` `=` $target
-    (`,` `objects` `=` $objects^ )?
+    `target` `(` $target `)`
+    (`objects` `(` $objects^ `)` )?
     attr-dict-with-keyword
     regions
   }];

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
index 63f7687..66026aa 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir

@@ -191,7 +191,7 @@
 // -----
 
 hal.executable @ex {
-  hal.executable.variant @backend, target = <"backend", "format"> {
+  hal.executable.variant @backend target(<"backend", "format">) {
     hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
       #hal.descriptor_set.layout<0, bindings = [
         #hal.descriptor_set.binding<0, storage_buffer>,
@@ -221,7 +221,7 @@
 // -----
 
 hal.executable @ex {
-  hal.executable.variant @backend, target = <"backend", "format"> {
+  hal.executable.variant @backend target(<"backend", "format">) {
     hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
       #hal.descriptor_set.layout<0, bindings = [
         #hal.descriptor_set.binding<0, storage_buffer>,

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_folding.mlir
index 360453e..dc6004e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_folding.mlir

@@ -4,7 +4,7 @@
 
 // CHECK-LABEL: @multiple_constant_blocks
 hal.executable @multiple_constant_blocks {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK: hal.executable.constant.block() -> (i32, i32, i32) as ("foo", "bar", "baz")
     hal.executable.constant.block() -> (i32, i32) as ("foo", "bar") {
       // CHECK-DAG: %[[FOO:.+]] = arith.constant 0
@@ -32,7 +32,7 @@
 
 // CHECK-LABEL: @complex_constant_blocks
 hal.executable @complex_constant_blocks {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK: hal.executable.constant.block(%[[DEVICE:.+]]: !hal.device) -> (i32, i32, i32) as ("foo", "bar", "baz")
     hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as ("foo", "bar") {
       // CHECK-DAG: %[[DUMMY:.+]] = arith.constant 0
@@ -75,7 +75,7 @@
 
 // CHECK-LABEL: @unused_device_arg
 hal.executable @unused_device_arg {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK: hal.executable.constant.block() -> i32 as "foo"
     hal.executable.constant.block(%device: !hal.device) -> i32 as "foo" {
       %c0 = arith.constant 0 : i32
@@ -91,7 +91,7 @@
 
 // CHECK-LABEL: @duplicate_keys
 hal.executable @duplicate_keys {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK: hal.executable.constant.block() -> (i32, i32) as ("foo", "bar")
     hal.executable.constant.block() -> (i32, i32, i32) as ("foo", "bar", "foo") {
       // CHECK-DAG: %[[FOO:.+]] = arith.constant 1000
@@ -114,7 +114,7 @@
 
 // CHECK-LABEL: @multiple_blocks_duplicate_keys
 hal.executable @multiple_blocks_duplicate_keys {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK:  hal.executable.constant.block() -> (i32, i32) as ("foo", "bar")
     hal.executable.constant.block() -> (i32, i32) as ("foo", "bar") {
       // CHECK-DAG: %[[FOO:.+]] = arith.constant 0

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
index 31da799..a2590db 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir

@@ -3,8 +3,13 @@
 #executable_target_format = #hal.executable.target<"backend", "format">
 // CHECK-LABEL: @ex
 hal.executable @ex {
-  // CHECK: hal.executable.variant public @backend, target = #executable_target_format
-  hal.executable.variant @backend, target = #executable_target_format {
+  // CHECK: hal.executable.variant public @backend
+  // CHECK-SAME: target(#executable_target_format)
+  // CHECK-SAME: objects([#hal.executable.object<{path = "foo.bin"}>, #hal.executable.object<{path = "bar.bin"}>])
+  hal.executable.variant @backend target(#executable_target_format) objects([
+    #hal.executable.object<{path = "foo.bin"}>,
+    #hal.executable.object<{path = "bar.bin"}>
+  ]) {
     // CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes {
     // CHECK-SAME:     workgroup_size = [4 : index, 1 : index, 1 : index]
     hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
@@ -31,8 +36,8 @@
 
 // CHECK-LABEL: @ex_with_workgroup_count_region
 hal.executable @ex_with_workgroup_count_region {
-  // CHECK: hal.executable.variant public @backend, target = #executable_target_format
-  hal.executable.variant @backend, target = #executable_target_format {
+  // CHECK: hal.executable.variant public @backend target(#executable_target_format
+  hal.executable.variant @backend target(#executable_target_format) {
     // CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes {
     // CHECK-SAME:     subgroup_size = 64 : index
     // CHECK-SAME:     workgroup_size = [4 : index, 1 : index, 1 : index]
@@ -65,7 +70,7 @@
 // CHECK-LABEL: @ex_with_constants
 hal.executable @ex_with_constants {
   // CHECK: hal.executable.variant public @backend
-  hal.executable.variant @backend, target = #executable_target_format {
+  hal.executable.variant @backend target(#executable_target_format) {
     // CHECK: hal.executable.constant.block(%{{.+}}: !hal.device) -> (i32, i32) as ("foo", "bar")
     hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as ("foo", "bar") {
       %c0 = arith.constant 0 : i32
@@ -134,7 +139,7 @@
 // CHECK-LABEL: @unresolved_workload_ex
 hal.executable @unresolved_workload_ex {
   // CHECK: hal.executable.variant public @backend
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // CHECK: hal.executable.export public @entry0
     hal.executable.export public @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
       #hal.descriptor_set.layout<0, bindings = [

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir
index 1184a0b..742128a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir

@@ -20,7 +20,7 @@
 // -----
 
 hal.executable @ex_with_constants {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // expected-error @+1 {{must have one key for every result}}
     hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as ("foo") {
       %c0 = arith.constant 0 : i32
@@ -33,7 +33,7 @@
 // -----
 
 hal.executable @ex_with_constants {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     hal.executable.constant.block(%device: !hal.device) -> (i32, i32) as ("foo", "bar") {
       %c0 = arith.constant 0 : i32
       // expected-error @+1 {{return must have the same number of operands}}
@@ -45,7 +45,7 @@
 // -----
 
 hal.executable @ex_with_constants {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     hal.executable.constant.block(%device: !hal.device) -> i32 as "foo" {
       %c0 = arith.constant 0.0 : f32
       // expected-error @+1 {{parent expected result 0 to be 'i32' but returning 'f32'}}
@@ -57,7 +57,7 @@
 // -----
 
 hal.executable @ex_with_constants {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // expected-error @+1 {{initializer must take a !hal.device or nothing}}
     hal.executable.constant.block(%device: !hal.device, %invalid: i32) -> i32 as "foo" {
       %c0 = arith.constant 0 : i32
@@ -69,7 +69,7 @@
 // -----
 
 hal.executable @ex_with_constants {
-  hal.executable.variant @backend, target = #hal.executable.target<"backend", "format"> {
+  hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
     // expected-error @+1 {{initializer must return only i32 values}}
     hal.executable.constant.block() -> f32 as "foo" {
       %c0 = arith.constant 0.0 : f32

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/test/smoketest.mlir b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/test/smoketest.mlir
index 97b1cf3..86e0bb4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/test/smoketest.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/test/smoketest.mlir

@@ -38,7 +38,7 @@
 }
 
 // CHECK-LABEL: hal.executable public @add_dispatch_0
-//  CHECK-NEXT:   hal.executable.variant public @vmvx_bytecode_fb, target = <"vmvx", "vmvx-bytecode-fb"> {
+//  CHECK-NEXT:   hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
 //  CHECK-NEXT:     hal.executable.export public @add_dispatch_0 ordinal(0)
 //  CHECK-SAME:       layout(#hal.pipeline.layout<push_constants = 0, sets = [
 //  CHECK-SAME:         <0, bindings = [

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/test/linking.mlir b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/test/linking.mlir
index 11ff85e..180b901 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/test/linking.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/test/linking.mlir

@@ -21,7 +21,7 @@
 ]>
 
 hal.executable private @call_dispatch_0  {
-  hal.executable.variant @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export @call_dispatch_0 ordinal(0) layout(#pipeline_layout_0) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -39,7 +39,7 @@
   }
 }
 hal.executable private @call_dispatch_1  {
-  hal.executable.variant @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export @call_dispatch_1 ordinal(0) layout(#pipeline_layout_1) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -57,7 +57,7 @@
   }
 }
 hal.executable private @call_dispatch_2  {
-  hal.executable.variant @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export @call_dispatch_2 ordinal(0) layout(#pipeline_layout_0) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -75,7 +75,7 @@
   }
 }
 hal.executable private @call_dispatch_3  {
-  hal.executable.variant @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export @call_dispatch_3 ordinal(0) layout(#pipeline_layout_1) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -93,7 +93,7 @@
   }
 }
 hal.executable private @call_dispatch_4  {
-  hal.executable.variant @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+  hal.executable.variant @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
     hal.executable.export @call_dispatch_4 ordinal(0) layout(#pipeline_layout_1) {
     ^bb0(%arg0: !hal.device) :
       %c1 = arith.constant 1 : index
@@ -114,7 +114,7 @@
 // Two groups should be created, according to their interfaces.
 
 //      CHECK: hal.executable private @linking_linked_vulkan_0 {
-// CHECK-NEXT:   hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+// CHECK-NEXT:   hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
 // CHECK-NEXT:     hal.executable.export public @call_dispatch_1 ordinal(0) layout(#pipeline_layout_0)
 // CHECK-NEXT:     hal.executable.export public @call_dispatch_3 ordinal(1) layout(#pipeline_layout_0)
 // CHECK-NEXT:     hal.executable.export public @call_dispatch_4 ordinal(2) layout(#pipeline_layout_0)
@@ -141,7 +141,7 @@
 // CHECK-NEXT: }
 
 //      CHECK: hal.executable private @linking_linked_vulkan {
-// CHECK-NEXT:   hal.executable.variant public @vulkan_spirv_fb, target = #executable_target_vulkan_spirv_fb {
+// CHECK-NEXT:   hal.executable.variant public @vulkan_spirv_fb target(#executable_target_vulkan_spirv_fb) {
 // CHECK-NEXT:     hal.executable.export public @call_dispatch_0 ordinal(0) layout(#pipeline_layout_1)
 // CHECK-NEXT:     hal.executable.export public @call_dispatch_2 ordinal(1) layout(#pipeline_layout_1)
 // CHECK-NEXT:     module  {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
index d45978d..84e7e27 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir

@@ -2,10 +2,10 @@
 
 // Tests an end-to-end simple single-dispatch `dispatch(arg0, arg1) -> result`.
 
-#executable_target_embedded_elf_aarch64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64">
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
+#executable_target_embedded_elf_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64">
+#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
 #device_target_cpu = #hal.device.target<"llvm-cpu", {
-  executable_targets = [#executable_target_embedded_elf_x86_64_]
+  executable_targets = [#executable_target_embedded_elf_x86_64]
 }>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -20,7 +20,7 @@
 
   // CHECK: hal.executable private @ex
   hal.executable private @ex {
-    hal.executable.variant public @embedded_elf_aarch64, target = #executable_target_embedded_elf_aarch64_ {
+    hal.executable.variant public @embedded_elf_aarch64 target(#executable_target_embedded_elf_aarch64) {
       hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
         translation_info = #iree_codegen.translation_info<CPUDefault>
       } {
@@ -33,7 +33,7 @@
         // Opaque at this point (in some target-specific dialects).
       }
     }
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
       hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) attributes {
         translation_info = #iree_codegen.translation_info<CPUDefault>
       } {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
index fb1d328..cdac0d8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir

@@ -3,9 +3,9 @@
 // Tests dumping executable benchmarks to stdout - it's more common to use files
 // but this is much easier to test with lit.
 
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
+#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
 #device_target_cpu = #hal.device.target<"llvm-cpu", {
-  executable_targets = [#executable_target_embedded_elf_x86_64_]
+  executable_targets = [#executable_target_embedded_elf_x86_64]
 }>
 #pipeline_layout_0 = #hal.pipeline.layout<push_constants = 2, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -26,7 +26,7 @@
   // Executable should be dumped:
   // CHECK: hal.executable private @ex0
   hal.executable private @ex0 {
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
       hal.executable.export public @dispatch0 ordinal(0) layout(#pipeline_layout_0) attributes {
         translation_info = #iree_codegen.translation_info<CPUDefault>
       } {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir
index c1bae6e..6c9abfc 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir

@@ -3,9 +3,9 @@
 // Tests dumping executable sources to stdout - it's more common to use files
 // but this is much easier to test with lit.
 
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
+#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
 #device_target_cpu = #hal.device.target<"llvm-cpu", {
-  executable_targets = [#executable_target_embedded_elf_x86_64_]
+  executable_targets = [#executable_target_embedded_elf_x86_64]
 }>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
   #hal.descriptor_set.layout<0, bindings = [
@@ -20,8 +20,8 @@
   // CHECK: hal.executable public @ex0
   hal.executable private @ex0 {
     // We expect local outputs with attributes inlined:
-    // CHECK-NEXT: hal.executable.variant {{.+}}, target = <"llvm-cpu"
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    // CHECK-NEXT: hal.executable.variant {{.+}} target(<"llvm-cpu"
+    hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
       hal.executable.export public @dispatch0 ordinal(0) layout(#pipeline_layout) attributes {
         translation_info = #iree_codegen.translation_info<CPUDefault>
       } {
@@ -40,7 +40,7 @@
 
   // CHECK: hal.executable private @ex1
   hal.executable private @ex1 {
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
       hal.executable.export public @dispatch1 ordinal(0) layout(#pipeline_layout) attributes {
         translation_info = #iree_codegen.translation_info<CPUDefault>
       } {

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
index fdc093a..0d02221 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir

@@ -20,7 +20,7 @@
   // CHECK-SAME:     <2, storage_buffer>
 
   // CHECK: hal.executable private @ex_workgroups
-  // CHECK:   hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64
+  // CHECK:   hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64
   // CHECK:     hal.executable.export public @entry ordinal(0) layout(#pipeline_layout) {
   // CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index):
   // CHECK-NEXT:   hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index
@@ -28,7 +28,7 @@
   // CHECK:     builtin.module
   // CHECK-NEXT:  func.func private @extern_func()
   // CHECK-NEXT:  func.func @entry
-  // CHECK:   hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64
+  // CHECK:   hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64
   // CHECK:     hal.executable.export public @entry ordinal(0) layout(#pipeline_layout) {
   // CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index):
   // CHECK-NEXT:   hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index
@@ -113,11 +113,11 @@
 }
 
 // CHECK: hal.executable public @ex
-// CHECK:   hal.executable.variant public @embedded_elf_arm_64, target = #executable_target_embedded_elf_arm_64
+// CHECK:   hal.executable.variant public @embedded_elf_arm_64 target(#executable_target_embedded_elf_arm_64
 // CHECK:     hal.executable.export public @entry layout(#pipeline_layout)
 // CHECK:     builtin.module
 // CHECK-NEXT:  func.func @entry()
-// CHECK:   hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64
+// CHECK:   hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64
 // CHECK:     hal.executable.export public @entry layout(#pipeline_layout)
 // CHECK:     builtin.module
 // CHECK-NEXT:  func.func @entry()

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
index dba9d91..b129edf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir

@@ -121,7 +121,7 @@
 // TODO(scotttodd): Test without depending on a specific HAL target? Or move to HAL/Target/*/test/?
 //   - If there is no matching hal.executable.variant then the executable will not be cached
 hal.executable @exe {
-  hal.executable.variant @vmvx, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @entry0 ordinal(0) layout(#pipeline_layout_0) attributes {
       workgroup_size = [32 : index, 1 : index, 1 : index]
     }
@@ -262,7 +262,7 @@
 }
 
 hal.executable @exe {
-  hal.executable.variant @vmvx, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @vmvx target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @entry ordinal(0) layout(#pipeline_layout_0) attributes {
       workgroup_size = [32 : index, 1 : index, 1 : index]
     }

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir
index 9783abb..670a326 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir

@@ -20,7 +20,7 @@
 // CHECK: hal.executable private @executable_a
 hal.executable private @executable_a {
   // CHECK: hal.executable.variant public @variant_a
-  hal.executable.variant public @variant_a, target = #hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 123 : i64}> {
+  hal.executable.variant public @variant_a target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 123 : i64}>) {
     hal.executable.export public @dispatch_a ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %c1 = arith.constant 1 : index
@@ -36,7 +36,7 @@
     }
   }
   // CHECK: hal.executable.variant public @variant_unmodified
-  hal.executable.variant public @variant_unmodified, target = #hal.executable.target<"cuda", "cuda-nvptx-fb", {}> {
+  hal.executable.variant public @variant_unmodified target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {}>) {
     hal.executable.export public @dispatch_unmodified ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index):
       %c1 = arith.constant 1 : index
@@ -56,7 +56,7 @@
 // CHECK: hal.executable private @executable_b
 hal.executable private @executable_b {
   // CHECK: hal.executable.variant public @variant_b
-  hal.executable.variant public @variant_b, target = #hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 456 : i64}> {
+  hal.executable.variant public @variant_b target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 456 : i64}>) {
     hal.executable.export public @dispatch_b ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device):
       %c1 = arith.constant 1 : index

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
index 88359a0..22d6f35 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir

@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --iree-hal-resolve-export-ordinals %s | FileCheck %s
 
 hal.executable @exe {
-  hal.executable.variant @target, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @entry ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
       #hal.descriptor_set.layout<0, bindings = [
         #hal.descriptor_set.binding<0, storage_buffer>,
@@ -56,7 +56,7 @@
 // -----
 
 hal.executable @exe {
-  hal.executable.variant @target, target = <"vmvx", "vmvx-bytecode-fb"> {
+  hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) {
     hal.executable.export @entry ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
       #hal.descriptor_set.layout<0, bindings = [
         #hal.descriptor_set.binding<0, storage_buffer>,

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir
index a2d3ffd..ba2baad 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir

@@ -6,7 +6,7 @@
 // This entire executable should be replaced including the export.
 // CHECK: hal.executable private @executable0
 hal.executable private @executable0 {
-  hal.executable.variant public @variant, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       // CHECK: arith.constant 123
@@ -33,7 +33,7 @@
   // CHECK-SAME: #hal.executable.object<{
   // CHECK-SAME:   path = "substitute_executables_replacement.obj",
   // CHECK-SAME:   data = dense<[72, 69, 76, 76, 79, 33,
-  hal.executable.variant public @variant, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export public @dispatch1 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       // CHECK: arith.constant 100 : index

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir
index 407369e..4158e02 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir

@@ -1,6 +1,6 @@
 // Replacement executable for substitute_executables.mlir.
 hal.executable private @executable0 {
-  hal.executable.variant public @variant, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+  hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
     hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %c123 = arith.constant 123 : index

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
index db6f753..6d90583 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir

@@ -49,6 +49,6 @@
   // CHECK-NEXT: hal.executable private @exe
   hal.executable private @exe {
     // CHECK-NEXT: hal.executable.variant public @embedded_elf_arm_64
-    hal.executable.variant public @embedded_elf_arm_64, target = #executable_target {}
+    hal.executable.variant public @embedded_elf_arm_64 target(#executable_target) {}
   }
 }

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
index f8820d6..e6dfb34 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir

@@ -6,7 +6,7 @@
 
 // CHECK-NOT: hal.executable
 hal.executable private @ex {
-  hal.executable.variant public @vmvx_ir, target = <"vmvx-inline", "vmvx-ir"> {
+  hal.executable.variant public @vmvx_ir target(<"vmvx-inline", "vmvx-ir">) {
     hal.executable.export public @dispatch_0 ordinal(0) layout(
          #hal.pipeline.layout<push_constants = 2,
                                 sets = [

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
index 30d5fa4..7e5471d 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir

@@ -12,7 +12,7 @@
   ]>
 ]>
 hal.executable private @ex {
-  hal.executable.variant public @variant, target = #hal.executable.target<"llvm", "embedded-elf-x86_64"> {
+  hal.executable.variant public @variant target(#hal.executable.target<"llvm", "embedded-elf-x86_64">) {
     hal.executable.export public @dispatch ordinal(16) layout(#pipeline_layout) {
     ^bb0(%device: !hal.device, %workload_x: index, %workload_y: index):
       %count_x = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%workload_x]

diff --git a/samples/custom_dispatch/cpu/embedded/example_hal.mlir b/samples/custom_dispatch/cpu/embedded/example_hal.mlir
index f2788c2..7bddf07 100644
--- a/samples/custom_dispatch/cpu/embedded/example_hal.mlir
+++ b/samples/custom_dispatch/cpu/embedded/example_hal.mlir

@@ -58,7 +58,7 @@
   hal.executable private @executable {
 
     // Variant linking in an x86-64 object file containing external functions.
-    hal.executable.variant public @x86_64, target = #x86_64_target, objects = [
+    hal.executable.variant public @x86_64 target(#x86_64_target) objects([
       // Object files linked into the executable.
       // These object files are linked into the dynamic library and must meet
       // the requirements for embedded ELF linkage (no TLS, no globals, no
@@ -73,7 +73,7 @@
         // part of the overall compilation.
         path = "samples/custom_dispatch/cpu/embedded/functions_x86_64.o"
       }>
-    ] {
+    ]) {
 
       // TODO(benvanik): demonstrate hal.executable.constant.block for
       // specialization via host logic and hal.executable.constant.load for
commit	20e2112cf149bb342e1d9bfe8f01229a674de1ab	[log] [tgz]
author	Ben Vanik <ben.vanik@gmail.com>	Fri Oct 20 11:44:37 2023 -0700
committer	GitHub <noreply@github.com>	Fri Oct 20 11:44:37 2023 -0700
tree	589d1e7eb03c0c819068007a7b3e0f6621e2ccdc
parent	a95a28a7f56472b3098edfd31d0adeedc64cb180 [diff]