[Codegen] Test Cleanup 4/8: Dialect tests (#22747)

Result of a scan over all tests in Codegen to cleanup common issues in
tests. A summary of the results + a preamble approximating the issues to
look for can be found here:

https://gist.github.com/qedawkins/40f9e604fd83745bf1ac20fd63a7a61f
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/roundtrip.mlir b/compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/roundtrip.mlir
index 3a9c37e..27fb3b4 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/roundtrip.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/roundtrip.mlir
@@ -48,13 +48,13 @@
 }
 // Order matters because it is sorted.
 // CHECK:       #[[$CONFIG:.+]] = #iree_cpu.lowering_config<
-// CHECK-SAME{LITERAL}:      cache_parallel = [64, 64, 0]
-// CHECK-SAME{LITERAL}:      cache_reduction = [0, 0, 16]
-// CHECK-SAME{LITERAL}:      distribution = [128, 128, 0]
+// CHECK-SAME:      cache_parallel = [64, 64, 0]
+// CHECK-SAME:      cache_reduction = [0, 0, 16]
+// CHECK-SAME:      distribution = [128, 128, 0]
 // CHECK-SAME{LITERAL}:      vector_common_parallel = [[4], [4], 0]
-// CHECK-SAME{LITERAL}:      vector_inner_parallel = [0, 0, 0]
+// CHECK-SAME:      vector_inner_parallel = [0, 0, 0]
 // CHECK-SAME{LITERAL}:      vector_reduction = [0, 0, [4]]
-// CHECK-LABEL:         @test_full_lowering_config_with_scalable_vector()
+// CHECK-LABEL: @test_full_lowering_config_with_scalable_vector()
 // CHECK-SAME:    lowering_config = #[[$CONFIG]]
 
 // -----
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/lowering_config_attr.mlir b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/lowering_config_attr.mlir
index 7f93802..439bbf2 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/lowering_config_attr.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/lowering_config_attr.mlir
@@ -66,7 +66,7 @@
     return
   }
 }
-// CHECK: #iree_codegen.export_config<workgroup_size = [4, 1]
+// CHECK: #iree_codegen.export_config<workgroup_size = [4, 1]>
 
 // -----
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/ukernel_ops.mlir b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/ukernel_ops.mlir
index 5015624..b70c873 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/ukernel_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/test/ukernel_ops.mlir
@@ -63,7 +63,7 @@
   return %0#0, %0#1 : tensor<?xf32>, tensor<?x?xf32>
 }
 //      CHECK: func @ukernel_generic_optional_input(
-//      CHECK:   %[[RESULT:.+]]:2 = iree_codegen.ukernel.generic
+//      CHECK:   %{{.+}}:2 = iree_codegen.ukernel.generic
 //  CHECK-NOT:       ins
 
 // -----
@@ -92,7 +92,7 @@
   return %0#0, %0#1 : tensor<?xf32>, tensor<?x?xf32>
 }
 //      CHECK: func @ukernel_generic_optional_other_operands(
-//      CHECK:   %[[RESULT:.+]]:2 = iree_codegen.ukernel.generic
+//      CHECK:   %{{.+}}:2 = iree_codegen.ukernel.generic
 // CHECK-SAME:       outs(%{{.+}}, %{{.+}} : tensor<?xf32>, tensor<?x?xf32>) ->
 
 // -----
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/test/iree_gpu_attrs.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/test/iree_gpu_attrs.mlir
index 2c6ac10..bf33e78 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/test/iree_gpu_attrs.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/test/iree_gpu_attrs.mlir
@@ -114,7 +114,7 @@
     return
   }
 }
-// CHECK-LABEL: func @test_data_tiled_mfma_f32_16x16x4_f32
+// CHECK-LABEL: func @test_data_tiled_mfma_f32_16x16x4_f32_subgroups_k
 //  CHECK-SAME:   mma_types = #iree_gpu.data_tiled_mma_layout<intrinsic = MFMA_F32_16x16x4_F32, intrinsics_m = 4, subgroups_k = 2, operands_interleaving_intrinsics_k = [0, 1]>
 
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/convert_to_multi_mma.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/convert_to_multi_mma.mlir
index 3b9f856..64902fa 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/convert_to_multi_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/convert_to_multi_mma.mlir
@@ -105,7 +105,7 @@
 // CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0)>
 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
 
-// CHECK-LABEL: func @convert_to_mfma_16x16x16
+// CHECK-LABEL: func @convert_to_mfma_16x16x16_transpose_b
 //  CHECK-SAME:   %[[LHS:[A-Za-z0-9]+]]: tensor<2x16x16xf16>
 //  CHECK-SAME:   %[[RHS:[A-Za-z0-9]+]]: tensor<2x16x16xf16>
 //  CHECK-SAME:   %[[ACC:[A-Za-z0-9]+]]: tensor<16x16xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_inner_tiled.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_inner_tiled.mlir
index 73ecea0..8d1224d 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_inner_tiled.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_inner_tiled.mlir
@@ -80,9 +80,6 @@
     transform.yield
   }
 }
-#map = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
-#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
 
 // CHECK-LABEL: func @distribute_inner_tiled_I8_16x16x32_I32
 //  CHECK-SAME:   %[[LHS:[A-Za-z0-9]+]]: tensor<2x2x16x32xi8>
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/unroll_multi_mma.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/unroll_multi_mma.mlir
index 21becfe..523a249 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/unroll_multi_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/unroll_multi_mma.mlir
@@ -92,8 +92,8 @@
   }
 }
 
-//    CHECK-LABEL: func @unroll_multi_mma_count
-// CHECK-COUNT-30:   %[[MMA:.+]] = iree_codegen.inner_tiled {{.*}} : vector<1x1x4xf16>, vector<1x1x4xf16> into vector<1x1x4xf32>
+//   CHECK-LABEL: func @unroll_multi_mma_count
+// CHECK-COUNT-30:   {{.+}} = iree_codegen.inner_tiled {{.*}} : vector<1x1x4xf16>, vector<1x1x4xf16> into vector<1x1x4xf32>
 // CHECK-COUNT-10:   vector.insert_strided_slice {{.*}} : vector<1x1x4xf32> into vector<2x5x4xf32>
 
 // -----
@@ -130,7 +130,7 @@
 }
 
 // CHECK-LABEL: func @unroll_scaled_multi_mma
-// CHECK-SAME: %[[LHS_SCALE:[A-Za-z0-9]+]]: vector<1x2x1xf8E8M0FNU>
+//  CHECK-SAME:   %[[LHS_SCALE:[A-Za-z0-9]+]]: vector<1x2x1xf8E8M0FNU>
 // CHECK-COUNT-2: vector.extract_strided_slice %[[LHS_SCALE]] {offsets = [0, 0]
 // CHECK-NOT: vector.extract_strided_slice %[[LHS_SCALE]] {offsets = [0, 0]
 // CHECK-COUNT-2: vector.extract_strided_slice %[[LHS_SCALE]] {offsets = [0, 1]
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_inner_tiled_to_lanes.mlir b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_inner_tiled_to_lanes.mlir
index 02b9f2b..6a79c59 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_inner_tiled_to_lanes.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/test/distribute_inner_tiled_to_lanes.mlir
@@ -1185,9 +1185,9 @@
 }
 
 // CHECK-LABEL: func @fuse_producer_slice
-// CHECK      :   scf.forall (%[[LANEID:.+]]) in (64) shared_outs(%[[ACC:.+]] = {{.*}}) -> (tensor<4x1x16x16xf32>)
-// CHECK      :     %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC]]
-// CHECK      :     %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[ACC_SLICE]] : tensor<4x1x4x1xf32>) -> tensor<4x1x4x1xf32>
-// CHECK      :     iree_codegen.inner_tiled
-// CHECK-SAME :     outs(%[[FILL]])
-// CHECK      :     mapping = [#iree_gpu.lane_id<0>]
+//       CHECK:   scf.forall ({{.+}}) in (64) shared_outs(%[[ACC:.+]] = {{.*}}) -> (tensor<4x1x16x16xf32>)
+//       CHECK:     %[[ACC_SLICE:.+]] = tensor.extract_slice %[[ACC]]
+//       CHECK:     %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[ACC_SLICE]] : tensor<4x1x4x1xf32>) -> tensor<4x1x4x1xf32>
+//       CHECK:     iree_codegen.inner_tiled
+//  CHECK-SAME:     outs(%[[FILL]])
+//       CHECK:     mapping = [#iree_gpu.lane_id<0>]
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/canonicalize.mlir b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/canonicalize.mlir
index 0775a34..195b987 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/canonicalize.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/canonicalize.mlir
@@ -136,14 +136,14 @@
   return %out : vector<64x1xf16>
 }
 
-// CHECK-LABEL: transfer_gather_fold_single_element
+// CHECK-LABEL: @transfer_gather_fold_single_element
 // CHECK-SAME: %{{.*}}: vector<1xindex>, %[[ARG1:.*]]: vector<64x1xindex>
 // CHECK: transfer_gather
 // CHECK-SAME: [None, %[[ARG1]]
 
 // -----
 
-func.func @transfer_gather_fold_contigious_load(%scalar: vector<64x1xindex>,
+func.func @transfer_gather_fold_contiguous_load(%scalar: vector<64x1xindex>,
   %indices: vector<64x1xindex>,
   %source: tensor<4096x64xf16>)
   -> vector<64x1xf16> {
@@ -157,8 +157,6 @@
   return %out : vector<64x1xf16>
 }
 
-// CHECK-LABEL: @transfer_gather_fold_contigious_load
+// CHECK-LABEL: @transfer_gather_fold_contiguous_load
 // CHECK: vector.transfer_read
 // CHECK-NOT: transfer_gather
-
-// -----
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/invalid.mlir b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/invalid.mlir
index d1c159f..d67b4fb 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/invalid.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/invalid.mlir
@@ -10,13 +10,10 @@
   subgroup_strides = [0, 0],
   thread_strides = [0, 0]>
 
-func.func @invalid_layout(%lhs: memref<32x32xf16>, %rhs: memref<32x32xf16>) -> vector<32x32xf16> {
-  %cst_0 = arith.constant 0.0 : f16
-  %c0 = arith.constant 0 : index
-  %result = vector.transfer_read %lhs[%c0, %c0], %cst_0 {in_bounds = [true, true]} : memref<32x32xf16>, vector<32x32xf16>
+func.func @invalid_layout(%arg0: vector<32x32xf16>) -> vector<32x32xf16> {
   // expected-error @+1 {{Vector shape: [32, 32] does not match the layout (nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [1, 1], element_tile = [1, 1], subgroup_strides = [0, 0], thread_strides = [0, 0]>) at dim 0. Dimension expected by layout: 1 actual: 32}}
-  %2 = iree_vector_ext.to_layout %result to layout(#layout1) : vector<32x32xf16>
-  return %2 : vector<32x32xf16>
+  %0 = iree_vector_ext.to_layout %arg0 to layout(#layout1) : vector<32x32xf16>
+  return %0 : vector<32x32xf16>
 }
 
 // -----
@@ -69,7 +66,7 @@
 
 // -----
 
-func.func @indexing_map_mismatch(%indices: vector<128x64xindex>,
+func.func @indexing_map_invalid_index_vector_shape(%indices: vector<128x64xindex>,
   %source: tensor<128x64xf16>)
   -> vector<128x64xf16> {
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/roundtrip.mlir b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/roundtrip.mlir
index 90bf3fe..2117d83 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/roundtrip.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR/test/roundtrip.mlir
@@ -108,16 +108,16 @@
   func.return %simd : vector<64x64xf16>
 }
 // CHECK-LABEL: func.func @to_simd_op
-// CHECK:      iree_vector_ext.to_simd
+// CHECK:      iree_vector_ext.to_simd %{{.+}} : vector<4x4x4xf16> -> vector<64x64xf16>
 
 // -----
 
 func.func @to_simt_op(%simd: vector<64x64xf32>) -> vector<4x4x4xf32> {
-  %simt = iree_vector_ext.to_simd %simd : vector<64x64xf32> -> vector<4x4x4xf32>
+  %simt = iree_vector_ext.to_simt %simd : vector<64x64xf32> -> vector<4x4x4xf32>
   func.return %simt : vector<4x4x4xf32>
 }
 // CHECK-LABEL: func.func @to_simt_op
-// CHECK:      iree_vector_ext.to_simd
+// CHECK:      iree_vector_ext.to_simt %{{.+}} : vector<64x64xf32> -> vector<4x4x4xf32>
 
 // -----
 
@@ -125,17 +125,17 @@
   %simd = iree_vector_ext.to_simd %simt : vector<f16> -> vector<f16>
   func.return %simd : vector<f16>
 }
-// CHECK-LABEL: func.func @to_simd_op
-// CHECK:      iree_vector_ext.to_simd
+// CHECK-LABEL: func.func @to_simd_op_0d
+// CHECK:      iree_vector_ext.to_simd %{{.+}} : vector<f16> -> vector<f16>
 
 // -----
 
 func.func @to_simt_op_0d(%simd: vector<f32>) -> vector<f32> {
-  %simt = iree_vector_ext.to_simd %simd : vector<f32> -> vector<f32>
+  %simt = iree_vector_ext.to_simt %simd : vector<f32> -> vector<f32>
   func.return %simt : vector<f32>
 }
-// CHECK-LABEL: func.func @to_simt_op
-// CHECK:      iree_vector_ext.to_simd
+// CHECK-LABEL: func.func @to_simt_op_0d
+// CHECK:      iree_vector_ext.to_simt %{{.+}} : vector<f32> -> vector<f32>
 
 // -----
 
@@ -177,5 +177,15 @@
   return %out, %out1, %out2, %out3 : vector<128x64xf16>, vector<128x64xf16>, vector<128x64xf16>, vector<128x64xf16>
 }
 
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d1)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
+
 // CHECK-LABEL: func.func @transfer_gather
-// CHECK: iree_vector_ext.transfer_gather
+// CHECK-SAME:    %[[INDICES0:.+]]: vector<128xindex>, %[[INDICES1:.+]]: vector<64xindex>, %[[INDICES2:.+]]: vector<128x64xindex>, %[[SOURCE:.+]]: tensor<4096x64xf16>
+// CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG:   %[[PAD:.+]] = arith.constant 0.000000e+00 : f16
+// CHECK:       iree_vector_ext.transfer_gather %[[SOURCE]][%[[C0]], %[[C0]]][None, %[[INDICES1]]: vector<64xindex>], %[[PAD]] {indexed_maps = [#[[$MAP0]]]} : tensor<4096x64xf16>, vector<128x64xf16>
+// CHECK:       iree_vector_ext.transfer_gather %[[SOURCE]][%[[C0]], %[[C0]]][%[[INDICES0]]: vector<128xindex>, None], %[[PAD]] {indexed_maps = [#[[$MAP1]]]} : tensor<4096x64xf16>, vector<128x64xf16>
+// CHECK:       iree_vector_ext.transfer_gather %[[SOURCE]][%[[C0]], %[[C0]]][%[[INDICES0]]: vector<128xindex>, %[[INDICES1]]: vector<64xindex>], %[[PAD]] {indexed_maps = [#[[$MAP1]], #[[$MAP0]]]} : tensor<4096x64xf16>, vector<128x64xf16>
+// CHECK:       iree_vector_ext.transfer_gather %[[SOURCE]][%[[C0]], %[[C0]]][None, %[[INDICES2]]: vector<128x64xindex>], %[[PAD]] {indexed_maps = [#[[$MAP2]]]} : tensor<4096x64xf16>, vector<128x64xf16>
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vector_ext_fold_unit_extent_dims.mlir b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vector_ext_fold_unit_extent_dims.mlir
index a3d5693..bce77fb 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vector_ext_fold_unit_extent_dims.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vector_ext_fold_unit_extent_dims.mlir
@@ -17,8 +17,10 @@
 }
 
 // CHECK-LABEL: func.func @dynamic_shape
-// CHECK-DAG: %[[DIM:.+]] = tensor.dim %arg0, %c3 : tensor<1x1x128x?xf16>
-// CHECK-DAG: %[[SLICE:.+]] = tensor.extract_slice %arg0[0, 0, 0, 0] [1, 1, 128, %[[DIM]]] [1, 1, 1, 1] : tensor<1x1x128x?xf16> to tensor<128x?xf16>
-// CHECK-DAG: %[[LAYOUT:.+]] = iree_vector_ext.to_layout %[[SLICE]]
-// CHECK-DAG: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<1x1x128x?xf16>
-// CHECK-DAG: %[[INSERT_SLICE:.+]] = tensor.insert_slice %[[LAYOUT]] into %[[EMPTY]][0, 0, 0, 0] [1, 1, 128, %dim] [1, 1, 1, 1] : tensor<128x?xf16> into tensor<1x1x128x?xf16>
+// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
+// CHECK-DAG: %[[DIM:.+]] = tensor.dim %arg0, %[[C3]] : tensor<1x1x128x?xf16>
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %arg0[0, 0, 0, 0] [1, 1, 128, %[[DIM]]] [1, 1, 1, 1] : tensor<1x1x128x?xf16> to tensor<128x?xf16>
+// CHECK: %[[LAYOUT:.+]] = iree_vector_ext.to_layout %[[SLICE]]
+// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<1x1x128x?xf16>
+// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[LAYOUT]] into %[[EMPTY]][0, 0, 0, 0] [1, 1, 128, %[[DIM]]] [1, 1, 1, 1] : tensor<128x?xf16> into tensor<1x1x128x?xf16>
+// CHECK: return %[[INSERT]] : tensor<1x1x128x?xf16>
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vectorize_vector_ext_ops.mlir b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vectorize_vector_ext_ops.mlir
index c0b29bd..d734cdb 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vectorize_vector_ext_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Dialect/VectorExt/Transforms/test/vectorize_vector_ext_ops.mlir
@@ -65,15 +65,17 @@
 
 // CHECK-LABEL: func.func @vectorize_matmul_dyn_parallel
 // CHECK-SAME: %[[AT:.+]]: tensor<?x64xf32>, %[[BT:.+]]: tensor<64x?xf32>, %[[CT:.+]]: tensor<?x?xf32>
+// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
 // CHECK-DAG: %[[PAD:.+]] = ub.poison : f32
-// CHECK-DAG: %[[ADIM:.+]] = tensor.dim %arg0, %c0 : tensor<?x64xf32>
-// CHECK-DAG: %[[BDIM:.+]] = tensor.dim %arg1, %c1 : tensor<64x?xf32>
-// CHECK-DAG: %[[AMASK:.+]] = vector.create_mask %[[ADIM]], %c64 : vector<64x64xi1>
-// CHECK-DAG: %[[AV:.+]] = vector.transfer_read %arg0[%c0, %c0], %[[PAD]], %[[AMASK]]
-// CHECK-DAG: %[[A:.+]]  = iree_vector_ext.to_layout %[[AV]] to layout(#iree_vector_ext.nested_layout<subgroup_tile = [1, 1], batch_tile = [1, 1], outer_tile = [1, 1], thread_tile = [1, 1], element_tile = [64, 64], subgroup_strides = [0, 0], thread_strides = [0, 0]>) : vector<64x64xf32>
-
-// CHECK-DAG: %[[OPMASK:.+]]  = vector.create_mask %[[ADIM]], %[[BDIM]], %c64 : vector<64x64x64xi1>
-// CHECK-DAG: vector.mask %[[OPMASK]] { vector.contract {{.*}} %[[A]]
+// CHECK-DAG: %[[ADIM:.+]] = tensor.dim %[[AT]], %{{.+}} : tensor<?x64xf32>
+// CHECK-DAG: %[[BDIM:.+]] = tensor.dim %[[BT]], %{{.+}} : tensor<64x?xf32>
+// CHECK-DAG: %[[AMASK:.+]] = vector.create_mask %[[ADIM]], %[[C64]] : vector<64x64xi1>
+// CHECK-DAG: %[[AV:.+]] = vector.transfer_read %[[AT]][%{{.+}}, %{{.+}}], %[[PAD]], %[[AMASK]]
+// CHECK-DAG: %[[A:.+]] = iree_vector_ext.to_layout %[[AV]] to layout({{.+}}) : vector<64x64xf32>
+// CHECK-DAG: %[[BV:.+]] = vector.transfer_read %[[BT]][%{{.+}}, %{{.+}}], %[[PAD]], %{{.+}}
+// CHECK-DAG: %[[B:.+]] = iree_vector_ext.to_layout %[[BV]] to layout({{.+}}) : vector<64x64xf32>
+// CHECK-DAG: %[[OPMASK:.+]] = vector.create_mask %[[ADIM]], %[[BDIM]], %[[C64]] : vector<64x64x64xi1>
+// CHECK: vector.mask %[[OPMASK]] { vector.contract {{.*}} %[[A]], %[[B]], %{{.+}}
 
 // -----