[spirv] Port more tests over to dynamic pipelines (#6749)

Now we need to attach the configurations onto the ops themselves.
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
index 186a6c3..05cc9fd 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorize.cpp
@@ -544,6 +544,10 @@
     RewritePatternSet canoncalizationPatterns(context);
     populateAffineMinSCFCanonicalizationPattern(canoncalizationPatterns);
     SmallVector<int64_t> workgroupSize = getWorkgroupSize(entryPointOp);
+    if (workgroupSize.empty()) {
+      entryPointOp.emitError("expected to have workgroup_size attribute");
+      return signalPassFailure();
+    }
     auto getThreadRangeFn = [workgroupSize](Value processorValue,
                                             SmallVectorImpl<Value> &dims,
                                             SmallVectorImpl<Value> &symbols) {
diff --git a/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir b/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
index a7d78af..5c73cec 100644
--- a/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
@@ -1,133 +1,187 @@
-// RUN: iree-opt -split-input-file -iree-convert-to-spirv %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv)))' %s | IreeFileCheck %s
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
-  // CHECK-LABEL: spv.module
-  // CHECK: spv.GlobalVariable @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
-  // CHECK: spv.func @push_constant()
-  func @push_constant() {
-    // CHECK: %[[INDEX_0:.+]] = spv.Constant 0 : i32
-    // CHECK: %[[INDEX_1:.+]] = spv.Constant 2 : i32
-    // CHECK: %[[ADDR:.+]] = spv.mlir.addressof @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
-    // CHECK: %[[AC:.+]] = spv.AccessChain %[[ADDR]][%[[INDEX_0]], %[[INDEX_1]]] : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
-    // CHECK: spv.Load "PushConstant" %[[AC]] : i32
-    %0 = hal.interface.load.constant offset = 2 : index
-    return
-  }
-
+hal.executable @push_constant attributes {sym_visibility = "private"} {
   hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @push_constant attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+      // CHECK-LABEL: spv.module
+      // CHECK: spv.GlobalVariable @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+      // CHECK: spv.func @push_constant()
+      func @push_constant() {
+        // CHECK: %[[INDEX_0:.+]] = spv.Constant 0 : i32
+        // CHECK: %[[INDEX_1:.+]] = spv.Constant 2 : i32
+        // CHECK: %[[ADDR:.+]] = spv.mlir.addressof @__push_constant_var__ : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+        // CHECK: %[[AC:.+]] = spv.AccessChain %[[ADDR]][%[[INDEX_0]], %[[INDEX_1]]] : !spv.ptr<!spv.struct<(!spv.array<5 x i32, stride=4> [0])>, PushConstant>
+        // CHECK: spv.Load "PushConstant" %[[AC]] : i32
+        %0 = hal.interface.load.constant offset = 2 : index
+        return
+      }
+
+      hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write"
+      }
+    }
+  }
 }
 
 // -----
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
-  // CHECK-LABEL: spv.module
-  // CHECK: spv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-  // CHECK: spv.func @resource_bindings_in_same_entry_func()
-  func @resource_bindings_in_same_entry_func() {
-    %c0 = constant 0 : index
-
-    // Same type
-    // CHECK: spv.mlir.addressof @[[ARG0]]
-    // CHECK: spv.mlir.addressof @[[ARG0]]
-    %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
-    %1 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
-
-    // Different type
-    // CHECK: spv.mlir.addressof @[[ARG1_0]]
-    // CHECK: spv.mlir.addressof @[[ARG1_1]]
-    %2 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4x4xf32>
-    %3 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4xvector<4xf32>>
-
-    // CHECK: spv.mlir.addressof @[[RET0]]
-    %4 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32>
-
-    %5 = memref.load %0[%c0, %c0] : memref<4x4xf32>
-    %6 = memref.load %1[%c0, %c0] : memref<4x4xf32>
-
-    %7 = memref.load %2[%c0, %c0] : memref<4x4xf32>
-    %8 = memref.load %3[%c0] : memref<4xvector<4xf32>>
-
-    %9 = memref.load %4[%c0, %c0] : memref<4x4xf32>
-
-    return
-  }
-
+hal.executable @resource_bindings_in_same_func attributes {sym_visibility = "private"} {
   hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
     hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
     hal.interface.binding @arg1, set=1, binding=3, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @resource_bindings_in_same_func attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+      // CHECK-LABEL: spv.module
+      // CHECK: spv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+      // CHECK: spv.func @resource_bindings_in_same_entry_func()
+      func @resource_bindings_in_same_entry_func() {
+        %c0 = constant 0 : index
+
+        // Same type
+        // CHECK: spv.mlir.addressof @[[ARG0]]
+        // CHECK: spv.mlir.addressof @[[ARG0]]
+        %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+        %1 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+
+        // Different type
+        // CHECK: spv.mlir.addressof @[[ARG1_0]]
+        // CHECK: spv.mlir.addressof @[[ARG1_1]]
+        %2 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4x4xf32>
+        %3 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<4xvector<4xf32>>
+
+        // CHECK: spv.mlir.addressof @[[RET0]]
+        %4 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32>
+
+        %5 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+        %6 = memref.load %1[%c0, %c0] : memref<4x4xf32>
+
+        %7 = memref.load %2[%c0, %c0] : memref<4x4xf32>
+        %8 = memref.load %3[%c0] : memref<4xvector<4xf32>>
+
+        %9 = memref.load %4[%c0, %c0] : memref<4x4xf32>
+
+        return
+      }
+
+      hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
+        hal.interface.binding @arg1, set=1, binding=3, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
+      }
+    }
+  }
 }
 
 // -----
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
-  // CHECK-LABEL: spv.module
-  // CHECK: spv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-  // CHECK: spv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
-
-  // CHECK: spv.func @resource_bindings_in_entry_func1()
-  func @resource_bindings_in_entry_func1() {
-    // CHECK: spv.mlir.addressof @[[FUNC1_ARG]]
-    // CHECK: spv.mlir.addressof @[[FUNC1_RET]]
-    %c0 = constant 0 : index
-    %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
-    %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4xvector<4xf32>>
-
-    %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
-    %3 = memref.load %1[%c0] : memref<4xvector<4xf32>>
-
-    return
-  }
-
-  // CHECK: spv.func @resource_bindings_in_entry_func2()
-  func @resource_bindings_in_entry_func2() {
-    // CHECK: spv.mlir.addressof @[[FUNC2_ARG]]
-    // CHECK: spv.mlir.addressof @[[FUNC2_RET]]
-    %c0 = constant 0 : index
-    %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32> // Same type as previous function
-    %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32> // Different type as previous function
-
-    %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
-    %3 = memref.load %1[%c0, %c0] : memref<4x4xf32>
-
-    return
-  }
-
+hal.executable @resource_bindings_in_multi_entry_func attributes {sym_visibility = "private"} {
   hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
     hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @resource_bindings_in_entry_func1 attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    hal.executable.entry_point @resource_bindings_in_entry_func2 attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, {}>} {
+      // CHECK-LABEL: spv.module
+      // CHECK: spv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+      // CHECK: spv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+
+      // CHECK: spv.func @resource_bindings_in_entry_func1()
+      func @resource_bindings_in_entry_func1() {
+        // CHECK: spv.mlir.addressof @[[FUNC1_ARG]]
+        // CHECK: spv.mlir.addressof @[[FUNC1_RET]]
+        %c0 = constant 0 : index
+        %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32>
+        %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4xvector<4xf32>>
+
+        %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+        %3 = memref.load %1[%c0] : memref<4xvector<4xf32>>
+
+        return
+      }
+
+      // CHECK: spv.func @resource_bindings_in_entry_func2()
+      func @resource_bindings_in_entry_func2() {
+        // CHECK: spv.mlir.addressof @[[FUNC2_ARG]]
+        // CHECK: spv.mlir.addressof @[[FUNC2_RET]]
+        %c0 = constant 0 : index
+        %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<4x4xf32> // Same type as previous function
+        %1 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<4x4xf32> // Different type as previous function
+
+        %2 = memref.load %0[%c0, %c0] : memref<4x4xf32>
+        %3 = memref.load %1[%c0, %c0] : memref<4x4xf32>
+
+        return
+      }
+
+      hal.interface @io attributes {push_constants = 5 : index, sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=1, binding=2, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=3, binding=4, type="StorageBuffer", access="Write"
+      }
+    }
+  }
 }
 
 // -----
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
-  func @interface_binding() {
-    %c0 = constant 0 : index
-    %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<8x5xf32>
-    %1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<5xf32>
-    %2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<8x5xf32>
-
-    %3 = memref.load %0[%c0, %c0] : memref<8x5xf32>
-    %4 = memref.load %1[%c0] : memref<5xf32>
-    %5 = memref.load %2[%c0, %c0] : memref<8x5xf32>
-
-    return
-  }
+hal.executable @interface_binding attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
     hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @interface_binding attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
+      func @interface_binding() {
+        %c0 = constant 0 : index
+        %0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<8x5xf32>
+        %1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<5xf32>
+        %2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<8x5xf32>
+
+        %3 = memref.load %0[%c0, %c0] : memref<8x5xf32>
+        %4 = memref.load %1[%c0] : memref<5xf32>
+        %5 = memref.load %2[%c0, %c0] : memref<8x5xf32>
+
+        return
+      }
+      hal.interface @io attributes {sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+      }
+    }
+  }
 }
 
 // Explicitly check the variable symbols
@@ -143,18 +197,32 @@
 
 // -----
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
-  func @interface_wg_id() {
-    %0 = hal.interface.workgroup.id[0] : index
-    %1 = hal.interface.workgroup.id[1] : index
-    return
-  }
+hal.executable @interface_wg_id attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
     hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @interface_wg_id attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
+      func @interface_wg_id() {
+        %0 = hal.interface.workgroup.id[0] : index
+        %1 = hal.interface.workgroup.id[1] : index
+        return
+      }
+      hal.interface @io attributes {sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+      }
+    }
+  }
 }
+
 // CHECK-LABEL: spv.module
 //   CHECK-DAG:   spv.GlobalVariable @[[WGID:.+]] built_in("WorkgroupId")
 //       CHECK:   spv.func
@@ -167,17 +235,30 @@
 
 // -----
 
-module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
-  func @interface_wg_count() {
-    %0 = hal.interface.workgroup.count[0] : index
-    %1 = hal.interface.workgroup.count[1] : index
-    return
-  }
+hal.executable @interface_wg_count attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
     hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
+  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
+    hal.executable.entry_point @interface_wg_count attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 1: index, 1: index]
+    }
+    module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], []>, SwiftShader:CPU, {}>}  {
+      func @interface_wg_count() {
+        %0 = hal.interface.workgroup.count[0] : index
+        %1 = hal.interface.workgroup.count[1] : index
+        return
+      }
+      hal.interface @io attributes {sym_visibility = "private"} {
+        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
+        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
+      }
+    }
+  }
 }
 // CHECK-LABEL: spv.module
 //   CHECK-DAG:   spv.GlobalVariable @[[WGCOUNT:.+]] built_in("NumWorkgroups")
diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index 936962e..1b4bfe3 100644
--- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-hlo-to-spirv-pipeline))' -iree-spirv-workgroup-tile-size=8,64,4 -iree-spirv-invocation-tile-size=8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-linalg-to-spirv-pipeline))' %s | IreeFileCheck %s
+
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
 
 hal.executable @fuse_and_vectorize_fill_matmul attributes {sym_visibility = "private"} {
   hal.interface @io {
@@ -8,8 +10,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @fuse_and_vectorize_fill_matmul attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @fuse_and_vectorize_fill_matmul() {
@@ -40,8 +43,8 @@
             %13 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg0)[%workgroup_size_y]
             %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg1)[%workgroup_size_x]
             %15 = linalg.init_tensor [%13, %14] : tensor<?x?xf32>
-            %16 = linalg.fill(%cst, %15) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
-            %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %10 : tensor<?x4096xf32>, tensor<4096x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
+            %16 = linalg.fill(%cst, %15) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+            %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %10 : tensor<?x4096xf32>, tensor<4096x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
             flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:4096x4096xf32>
           }
         }
@@ -66,6 +69,8 @@
 
 // -----
 
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
+
 hal.executable @fuse_and_vectorize_matmul_add attributes {sym_visibility = "private"} {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -74,8 +79,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @fuse_and_vectorize_matmul_add attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @fuse_and_vectorize_matmul_add() {
@@ -112,9 +118,9 @@
             %18 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg0)[%workgroup_size_y]
             %19 = affine.min affine_map<(d0)[s0] -> (-d0 + 256, s0)>(%arg1)[%workgroup_size_x]
             %20 = linalg.init_tensor [%18, %19] : tensor<?x?xf32>
-            %21 = linalg.fill(%cst, %20) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
-            %22 = linalg.matmul ins(%15, %17 : tensor<?x512xf32>, tensor<512x?xf32>) outs(%21 : tensor<?x?xf32>) -> tensor<?x?xf32>
-            %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%13 : tensor<?x?xf32>) attrs =  {__internal_linalg_transform__ = "workgroup"} {
+            %21 = linalg.fill(%cst, %20) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+            %22 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%15, %17 : tensor<?x512xf32>, tensor<512x?xf32>) outs(%21 : tensor<?x?xf32>) -> tensor<?x?xf32>
+            %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%13 : tensor<?x?xf32>) attrs =  {__internal_linalg_transform__ = "workgroup", lowering.config = #config} {
             ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
               %24 = addf %arg2, %arg3 : f32
               linalg.yield %24 : f32
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
index 52a6cac..1ce42bd 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-tile-and-vectorize,canonicalize,cse))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-vectorize,canonicalize,cse))))' %s | IreeFileCheck %s
 
 #map0 = affine_map<()[s0] -> (s0 * 8)>
 #map1 = affine_map<()[s0, s1] -> (8, s1 - s0 * 8)>
@@ -8,6 +8,8 @@
 #map5 = affine_map<(d0, d1, d2) -> (d2, d1)>
 #map6 = affine_map<(d0, d1, d2) -> (d0, d1)>
 
+#config = {tileSizes = [[8, 16, 0], [], [1, 1, 1]]}
+
 hal.executable @matmul attributes {sym_visibility = "private"} {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -15,7 +17,11 @@
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @matmul attributes {interface = @io, ordinal = 0 : index}
+    hal.executable.entry_point @matmul attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 8: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [8, 16]}
+    }
     module attributes {
       spv.target_env =
         #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>,
@@ -48,7 +54,7 @@
           %16 = memref.dim %arg2, %c1 : memref<?x?xf32>
           %17 = affine.min #map1()[%1, %16]
           %18 = memref.subview %arg2[%3, %10] [%15, %17] [1, 1]  : memref<?x?xf32> to memref<?x?xf32, #map3>
-          linalg.matmul {__internal_linalg_transform__ = "workgroup"}
+          linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config}
             ins(%7, %13 : memref<?x?xf32, #map3>, memref<?x?xf32, #map3>)
            outs(%18 : memref<?x?xf32, #map3>)
         }
@@ -77,6 +83,8 @@
 
 // -----
 
+#config = {tileSizes = [[1, 4, 32], [], [1, 1, 1]]}
+
 hal.executable @conv_1d attributes {sym_visibility = "private"} {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -84,7 +92,11 @@
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @conv_1d attributes {interface = @io, ordinal = 0 : index}
+    hal.executable.entry_point @conv_1d attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 4: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+    }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}  {
       func @conv_1d() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
         %cst = constant 0.000000e+00 : f32
@@ -107,7 +119,7 @@
         %15 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 1)>()[%3]
         %16 = memref.subview %0[%5, %12, %14] [1, %13, %15] [1, 1, 1] : memref<3x6x1xf32> to memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>
         %17 = memref.subview %0[%5, %12, %9] [1, %13, %10] [1, 1, 1] : memref<3x6x1xf32> to memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>
-        linalg.conv_1d_input_nwc_filter_wcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%8, %11 : memref<1x?x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 8 + s0 + d1 + d2)>>, memref<3x1x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>) outs(%16 : memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>)
+        linalg.conv_1d_input_nwc_filter_wcf { __internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%8, %11 : memref<1x?x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 8 + s0 + d1 + d2)>>, memref<3x1x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0 + d1 + d2)>>) outs(%16 : memref<1x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 6 + s0 + d1 + d2)>>)
         return
       }
       hal.interface @io attributes {sym_visibility = "private"} {
@@ -157,6 +169,8 @@
 #map6 = affine_map<(d0)[s0] -> (4, -d0 + s0)>
 #map7 = affine_map<(d0)[s0] -> (32, -d0 + s0)>
 
+#config = {tileSizes = [[0, 1, 4, 32], [], [0, 1, 1, 1]]}
+
 hal.executable @conv_no_padding attributes {sym_visibility = "private"} {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -164,7 +178,11 @@
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @conv_no_padding attributes {interface = @io, ordinal = 0 : index}
+    hal.executable.entry_point @conv_no_padding attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 4: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+    }
     module attributes {
       spv.target_env =
         #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>,
@@ -213,6 +231,7 @@
                       : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map5>
               linalg.conv_2d_input_nhwc_filter_hwcf {
                 __internal_linalg_transform__ = "workgroup",
+                lowering.config = #config,
                 dilations = dense<1> : tensor<2xi64>,
                 strides = dense<2> : tensor<2xi64>}
                  ins(%21, %arg0 : memref<?x?x?x?xf32, #map5>, memref<?x?x?x?xf32>)
@@ -270,6 +289,8 @@
 
 // -----
 
+#config = {tileSizes = [[0, 0, 1, 4, 32], [], [0, 0, 1, 1, 1]]}
+
 hal.executable @conv_3d attributes {sym_visibility = "private"} {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -277,7 +298,11 @@
     hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @conv_3d attributes {interface = @io, ordinal = 0 : index}
+    hal.executable.entry_point @conv_3d attributes {
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [32: index, 4: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+    }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative], [SPV_KHR_storage_buffer_storage_class]>, SwiftShader:CPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 16384 : i32, max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>, subgroup_size = 4 : i32}>}  {
       func @conv_3d() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
         %cst = constant 0.000000e+00 : f32
@@ -299,7 +324,7 @@
         %14 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 7)>()[%3]
         %15 = memref.subview %0[%5, %11, %13, 0, 0] [1, %12, %14, 7, 2] [1, 1, 1, 1, 1] : memref<2x7x7x7x2xf32> to memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>
         %16 = memref.subview %0[%5, %11, %13, 0, 0] [1, %12, %14, 7, 2] [1, 1, 1, 1, 1] : memref<2x7x7x7x2xf32> to memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>
-        linalg.conv_3d_input_ndhwc_filter_dhwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%10, %2 : memref<1x?x?x8x3xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 1536 + s0 + d1 * 192 + d2 * 24 + d3 * 3 + d4)>>, memref<2x2x2x3x2xf32>) outs(%15 : memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>)
+        linalg.conv_3d_input_ndhwc_filter_dhwcf {__internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%10, %2 : memref<1x?x?x8x3xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 1536 + s0 + d1 * 192 + d2 * 24 + d3 * 3 + d4)>>, memref<2x2x2x3x2xf32>) outs(%15 : memref<1x?x?x7x2xf32, affine_map<(d0, d1, d2, d3, d4)[s0] -> (d0 * 686 + s0 + d1 * 98 + d2 * 14 + d3 * 2 + d4)>>)
         return
       }
       hal.interface @io attributes {sym_visibility = "private"} {
@@ -334,6 +359,9 @@
 #map5 = affine_map<()[s0] -> (4, s0 * -4 + 14)>
 #map6 = affine_map<()[s0] -> (32, s0 * -32 + 13)>
 #map7 = affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 1092 + s0 + d1 * 78 + d2 * 6 + d3)>
+
+#config = {tileSizes = [[1, 4, 32], [], [1, 1, 1]]}
+
 module  {
   hal.executable @pooling_nhwc_max attributes {sym_visibility = "private"} {
     hal.interface @io {
@@ -342,11 +370,10 @@
       hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
     }
     hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-      hal.executable.entry_point @pooling_nhwc_max attributes {interface = @io, ordinal = 0 : index} {
-      ^bb0(%arg0: index, %arg1: index, %arg2: index):  // no predecessors
-        %c4 = constant 4 : index
-        %c1 = constant 1 : index
-        hal.return %c1, %c4, %c1 : index, index, index
+      hal.executable.entry_point @pooling_nhwc_max attributes {
+        interface = @io, ordinal = 0 : index,
+        workgroup_size = [32: index, 4: index, 1: index],
+        translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
       }
       module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>}  {
         func @pooling_nhwc_max() attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}} {
@@ -364,7 +391,7 @@
           %10 = affine.min #map5()[%4]
           %11 = affine.min #map6()[%3]
           %12 = memref.subview %2[0, %5, %7, 0] [2, %10, %11, 6] [1, 1, 1, 1] : memref<2x14x13x6xf32> to memref<2x?x?x6xf32, #map7>
-          linalg.pooling_nhwc_max {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%9, %1 : memref<2x?x?x6xf32, #map4>, memref<3x4xf32>) outs(%12 : memref<2x?x?x6xf32, #map7>)
+          linalg.pooling_nhwc_max {__internal_linalg_transform__ = "workgroup", lowering.config = #config, dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%9, %1 : memref<2x?x?x6xf32, #map4>, memref<3x4xf32>) outs(%12 : memref<2x?x?x6xf32, #map7>)
           return
         }
         hal.interface @io attributes {sym_visibility = "private"} {
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index 7378c56..1cbb881 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-concretize-workgroup-tiles,iree-spirv-tile-and-vectorize))' -canonicalize -cse -iree-spirv-workgroup-tile-size=1,8,64,4 -iree-spirv-invocation-tile-size=1,8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
+
+#config = {tileSizes = [[1, 8, 64, 4], [], [1, 8, 4, 4]]}
 
 hal.executable @batch_matmul_static_shape attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
@@ -8,8 +10,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @batch_matmul_static_shape attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8, 1]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @batch_matmul_static_shape() {
@@ -43,7 +46,9 @@
               %12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 1024)>(%arg2)[%workgroup_size_x]
               %13 = memref.subview %1[%arg0, 0, %arg2] [%9, 1024, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
               %14 = memref.subview %2[%arg0, %arg1, %arg2] [%9, %10, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
-              linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+              linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config}
+                ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+                outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
             }
           }
         }
@@ -74,21 +79,29 @@
 //  CHECK-DAG:  %[[C6:.+]] = constant 6 : index
 //  CHECK-DAG:  %[[C7:.+]] = constant 7 : index
 //      CHECK:  %[[BIDX:.+]] = hal.interface.workgroup.id[0]
+//      CHECK:  %[[BCNTX:.+]] = hal.interface.workgroup.count[0]
 //      CHECK:  %[[BIDY:.+]] = hal.interface.workgroup.id[1]
+//      CHECK:  %[[BCNTY:.+]] = hal.interface.workgroup.count[1]
 //      CHECK:  %[[BIDZ:.+]] = hal.interface.workgroup.id[2]
-//  CHECK-DAG:  %[[BOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//  CHECK-DAG:  %[[BOFFSET_X:.+]] = affine.apply #[[MAP1]]()[%[[BIDX]]]
+//      CHECK:  %[[BCNTZ:.+]] = hal.interface.workgroup.count[2]
+//      CHECK:  scf.for %[[IVZ:.+]] = %[[BIDZ]] to %{{.+}} step %[[BCNTZ]]
+//      CHECK:  %[[BOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
+//      CHECK:  %[[UBY:.+]] = affine.apply #[[MAP0]]()[%[[BCNTY]]]
+//      CHECK:  scf.for %[[IVY:.+]] = %[[BOFFSET_Y]] to %{{.+}} step %[[UBY]]
+//      CHECK:  %[[BOFFSET_X:.+]] = affine.apply #[[MAP1]]()[%[[BIDX]]]
+//      CHECK:  %[[UBX:.+]] = affine.apply #[[MAP1]]()[%[[BCNTX]]]
 //      CHECK:  %[[SUBVIEW_ARG0:.+]] = memref.subview %[[ARG0]]
-// CHECK-SAME:      [%[[BIDZ]], %[[BOFFSET_Y]], 0] [1, 8, 1024]
-//      CHECK:  %[[SUBVIEW_ARG1:.+]] = memref.subview %[[ARG1]]
-// CHECK-SAME:      [%[[BIDZ]], 0, %[[BOFFSET_X]]] [1, 1024, 64]
-//      CHECK:  %[[SUBVIEW_RESULT:.+]] = memref.subview %[[RET0]]
-// CHECK-SAME:      [%[[BIDZ]], %[[BOFFSET_Y]], %[[BOFFSET_X]]] [1, 8, 64]
+// CHECK-SAME:      [%[[IVZ]], %[[IVY]], 0] [1, 8, 1024]
 //      CHECK:  %[[IIDX:.+]] = "gpu.thread_id"() {dimension = "x"}
 //      CHECK:  %[[IIDY:.+]] = "gpu.thread_id"() {dimension = "y"}
 //      CHECK:  %[[IIDZ:.+]] = "gpu.thread_id"() {dimension = "z"}
 //  CHECK-DAG:  %[[IOFFSET_Y:.+]] = affine.apply #[[MAP0]]()[%[[IIDY]]]
 //  CHECK-DAG:  %[[IOFFSET_X:.+]] = affine.apply #[[MAP2]]()[%[[IIDX]]]
+//      CHECK:  scf.for %[[IVX:.+]] = %[[BOFFSET_X]] to %{{.+}} step %[[UBX]]
+//      CHECK:  %[[SUBVIEW_ARG1:.+]] = memref.subview %[[ARG1]]
+// CHECK-SAME:      [%[[IVZ]], 0, %[[IVX]]] [1, 1024, 64]
+//      CHECK:  %[[SUBVIEW_RESULT:.+]] = memref.subview %[[RET0]]
+// CHECK-SAME:      [%[[IVZ]], %[[IVY]], %[[IVX]]] [1, 8, 64]
 //      CHECK:  %[[SUBVIEW_RESULT_2:.+]] = memref.subview %[[SUBVIEW_RESULT]]
 // CHECK-SAME:      [%[[IIDZ]], %[[IOFFSET_Y]], %[[IOFFSET_X]]] [1, 8, 4]
 //  CHECK-DAG:  %[[READ_INIT_0:.+]] = vector.transfer_read
@@ -108,7 +121,7 @@
 //  CHECK-DAG:  %[[READ_INIT_7:.+]] = vector.transfer_read
 // CHECK-SAME:    %[[SUBVIEW_RESULT_2]][%[[C0]], %[[C7]],  %[[C0]]]
 
-//      CHECK:  %[[FOR_RES:.+]]:8 = scf.for %[[IV0:.+]] = {{.*}} to
+//      CHECK:  %[[FOR_RES:.+]]:8 = scf.for %[[IV3:.+]] = {{.*}} to
 // CHECK-SAME:  iter_args(%[[ACC_0:.+]] = %[[READ_INIT_0]],
 // CHECK-SAME:  %[[ACC_1:.+]] = %[[READ_INIT_1]],
 // CHECK-SAME:  %[[ACC_2:.+]] = %[[READ_INIT_2]],
@@ -118,9 +131,9 @@
 // CHECK-SAME:  %[[ACC_6:.+]] = %[[READ_INIT_6]],
 // CHECK-SAME:  %[[ACC_7:.+]] = %[[READ_INIT_7]])
 //  CHECK-DAG:    %[[SUBVIEW_LHS:.+]] = memref.subview %[[SUBVIEW_ARG0]]
-// CHECK-SAME:      [%[[IIDZ]], %[[IOFFSET_Y]], %[[IV0]]] [1, 8, 4]
+// CHECK-SAME:      [%[[IIDZ]], %[[IOFFSET_Y]], %[[IV3]]] [1, 8, 4]
 //  CHECK-DAG:    %[[SUBVIEW_RHS:.+]] = memref.subview %[[SUBVIEW_ARG1]]
-// CHECK-SAME:      [%[[IIDZ]], %[[IV0]], %[[IOFFSET_X]]] [1, 4, 4] [1, 1, 1]
+// CHECK-SAME:      [%[[IIDZ]], %[[IV3]], %[[IOFFSET_X]]] [1, 4, 4] [1, 1, 1]
 
 //  CHECK-DAG:    %[[READ_LHS_0:.+]] = vector.transfer_read %[[SUBVIEW_LHS]][%[[C0]], %[[C0]], %[[C0]]]
 //  CHECK-DAG:    %[[READ_LHS_1:.+]] = vector.transfer_read %[[SUBVIEW_LHS]][%[[C0]], %[[C1]], %[[C0]]]
@@ -355,6 +368,8 @@
 
 // -----
 
+#config = {tileSizes = [[1, 8, 64, 4], [], [1, 8, 4, 4]]}
+
 hal.executable @fused_fill_batch_matmul attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -363,8 +378,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @fused_fill_batch_matmul attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8, 1]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @fused_fill_batch_matmul() {
@@ -399,8 +415,8 @@
               %12 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 1024)>(%arg2)[%workgroup_size_x]
               %13 = memref.subview %1[%arg0, 0, %arg2] [%9, 1024, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
               %14 = memref.subview %2[%arg0, %arg1, %arg2] [%9, %10, %12] [1, 1, 1] : memref<4x1024x1024xf32> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
-              linalg.fill(%zero, %14) : f32, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
-              linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
+              linalg.fill(%zero, %14) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>
+              linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%11, %13 : memref<?x?x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>, memref<?x1024x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>) outs(%14 : memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 1048576 + s0 + d1 * 1024 + d2)>>)
             }
           }
         }
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index 01f71a3..e3c3076 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
@@ -1,4 +1,6 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-concretize-workgroup-tiles,iree-spirv-tile-and-vectorize))' -canonicalize -cse -iree-spirv-workgroup-tile-size=8,64,4 -iree-spirv-invocation-tile-size=8,4,4 -iree-spirv-workgroup-size=16,1,1 %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
+
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
 
 hal.executable @matmul_static_shape_f16 attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
@@ -8,8 +10,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @matmul_static_shape_f16 attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @matmul_static_shape_f16() {
@@ -36,8 +39,8 @@
             %9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 4096)>(%arg1)[%workgroup_size_x]
             %10 = memref.subview %2[%arg0, %arg1] [%7, %9] [1, 1] : memref<4096x4096xf16> to memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
             %11 = memref.subview %1[0, %arg1] [4096, %9] [1, 1] : memref<4096x4096xf16> to memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
-            linalg.fill(%cst, %10) {__internal_linalg_transform__ = "workgroup"} : f16, memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
-            linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %11 : memref<?x4096xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%10 : memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
+            linalg.fill(%cst, %10) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f16, memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
+            linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %11 : memref<?x4096xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%10 : memref<?x?xf16, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
           }
         }
         return
@@ -63,6 +66,8 @@
 
 // -----
 
+#config = {tileSizes = [[8, 64, 4], [], [8, 4, 4]]}
+
 hal.executable @matmul_static_shape_f32 attributes {sym_visibility = "private"} {
   hal.interface @io attributes {sym_visibility = "private"} {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -71,8 +76,9 @@
   }
   hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @matmul_static_shape_f32 attributes {
-      interface = @io,
-      ordinal = 0 : index
+      interface = @io, ordinal = 0 : index,
+      workgroup_size = [16: index, 1: index, 1: index],
+      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [64, 8]}
     }
     module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>, ARM:IntegratedGPU, {}>}  {
       func @matmul_static_shape_f32() {
@@ -99,8 +105,8 @@
             %9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 4096)>(%arg1)[%workgroup_size_x]
             %10 = memref.subview %1[0, %arg1] [4096, %9] [1, 1] : memref<4096x4096xf32> to memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
             %11 = memref.subview %2[%arg0, %arg1] [%7, %9] [1, 1] : memref<4096x4096xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
-            linalg.fill(%cst, %11) : f32, memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
-            linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%8, %10 : memref<?x4096xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%11 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
+            linalg.fill(%cst, %11) {__internal_linalg_transform__ = "workgroup", lowering.config = #config}: f32, memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>
+            linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%8, %10 : memref<?x4096xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>, memref<4096x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>) outs(%11 : memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4096 + s0 + d1)>>)
           }
         }
         return
diff --git a/iree/compiler/Codegen/Utils/Utils.cpp b/iree/compiler/Codegen/Utils/Utils.cpp
index c062645..912e61e 100644
--- a/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/iree/compiler/Codegen/Utils/Utils.cpp
@@ -29,8 +29,7 @@
 }
 
 IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp) {
-  auto variantOp =
-      funcOp.getOperation()->getParentOfType<IREE::HAL::ExecutableVariantOp>();
+  auto variantOp = funcOp->getParentOfType<IREE::HAL::ExecutableVariantOp>();
   for (auto op : variantOp.getOps<IREE::HAL::ExecutableEntryPointOp>()) {
     if (op.sym_name() == funcOp.getName()) {
       return op;
@@ -41,8 +40,7 @@
 
 llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
     ModuleOp module) {
-  auto variantOp =
-      module.getOperation()->getParentOfType<IREE::HAL::ExecutableVariantOp>();
+  auto variantOp = module->getParentOfType<IREE::HAL::ExecutableVariantOp>();
   llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPointOps;
   for (auto op : variantOp.getOps<IREE::HAL::ExecutableEntryPointOp>()) {
     entryPointOps[op.sym_name()] = op;