Clean up lit test for vector.contract -> aarch64 asm (#7705)
There are some extra ops in the input here that aren't required for the
test.
diff --git a/iree/compiler/Codegen/LLVMCPU/test/vector_contract_to_aarch64_asm.mlir b/iree/compiler/Codegen/LLVMCPU/test/vector_contract_to_aarch64_asm.mlir
index d3f86e3..5495e67 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/vector_contract_to_aarch64_asm.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/vector_contract_to_aarch64_asm.mlir
@@ -1,42 +1,39 @@
// RUN: iree-opt -iree-llvmcpu-vector-to-aarch64-inline-asm %s | IreeFileCheck %s
-func @vector_matmul_to_aarch64_asm_vec_dot(%lhs: memref<4x4xi8>, %rhs: memref<4x4xi8>, %dst: memref<4x4xi32>) {
- %c0 = arith.constant 0 : index
- %cst_i8_0 = arith.constant 0 : i8
- %cst_i32_0 = arith.constant 0 : i32
- %0 = vector.transfer_read %lhs[%c0, %c0] , %cst_i8_0 {in_bounds = [false, false]} : memref<4x4xi8>, vector<4x4xi8>
- %1 = vector.transfer_read %rhs[%c0, %c0] , %cst_i8_0 {in_bounds = [false, false]} : memref<4x4xi8>, vector<4x4xi8>
- %2 = vector.transfer_read %dst[%c0, %c0], %cst_i32_0 {in_bounds = [false, false]} : memref<4x4xi32>, vector<4x4xi32>
- %3 = arith.extsi %0 : vector<4x4xi8> to vector<4x4xi32>
- %4 = arith.extsi %1 : vector<4x4xi8> to vector<4x4xi32>
- %5 = vector.contract {
- indexing_maps = [
- affine_map<(d0, d1, d2) -> (d0, d2)>,
- affine_map<(d0, d1, d2) -> (d2, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>
- ], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>
- } %3, %4, %2 : vector<4x4xi32>, vector<4x4xi32> into vector<4x4xi32>
- vector.transfer_write %5, %dst[%c0, %c0] {in_bounds = [false, false]}: vector<4x4xi32>, memref<4x4xi32>
- return
+// CHECK-LABEL: @vector_i8i8i32matmul_to_aarch64_asm_vec_dot(
+func @vector_i8i8i32matmul_to_aarch64_asm_vec_dot(
+ // CHECK-SAME: %[[LHS:[a-zA-Z0-9_]+]]
+ %lhs: vector<4x4xi8>,
+ // CHECK-SAME: %[[RHS:[a-zA-Z0-9_]+]]
+ %rhs: vector<4x4xi8>,
+ // CHECK-SAME: %[[ACC:[a-zA-Z0-9_]+]]
+ %acc: vector<4x4xi32>) -> vector<4x4xi32> {
+ %lhs_wide = arith.extsi %lhs : vector<4x4xi8> to vector<4x4xi32>
+ %rhs_wide = arith.extsi %rhs : vector<4x4xi8> to vector<4x4xi32>
+ // CHECK-DAG: %[[RES_2D:.+]] = arith.constant dense<0> : vector<4x4xi32>
+ // CHECK-DAG: %[[DST0:.+]] = vector.extract %[[ACC]][0] : vector<4x4xi32>
+ // CHECK-DAG: %[[DST1:.+]] = vector.extract %[[ACC]][1] : vector<4x4xi32>
+ // CHECK-DAG: %[[DST2:.+]] = vector.extract %[[ACC]][2] : vector<4x4xi32>
+ // CHECK-DAG: %[[DST3:.+]] = vector.extract %[[ACC]][3] : vector<4x4xi32>
+ // CHECK-DAG: %[[LHS_1D:.+]] = vector.shape_cast %[[LHS]] : vector<4x4xi8> to vector<16xi8>
+ // CHECK-DAG: %[[RHS_T_2d:.+]] = vector.transpose %[[RHS]], [1, 0]
+ // CHECK-DAG: %[[RHS_T:.+]] = vector.shape_cast %[[RHS_T_2d]] : vector<4x4xi8> to vector<16xi8>
+ // CHECK: %[[ASM_RESULT:.+]] = llvm.inline_asm {{.*}} "=w,=w,=w,=w,w,w,0,1,2,3" %[[LHS_1D]], %[[RHS_T]], %[[DST0]], %[[DST1]], %[[DST2]], %[[DST3]]
+ // CHECK-DAG: %[[RES_0:.+]] = llvm.extractvalue %[[ASM_RESULT]][0] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
+ // CHECK-DAG: %[[RES_1:.+]] = llvm.extractvalue %[[ASM_RESULT]][1] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
+ // CHECK-DAG: %[[RES_2:.+]] = llvm.extractvalue %[[ASM_RESULT]][2] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
+ // CHECK-DAG: %[[RES_3:.+]] = llvm.extractvalue %[[ASM_RESULT]][3] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
+ // CHECK-DAG: %[[RES_2D_0:.+]] = vector.insert %[[RES_0]], %[[RES_2D]] [0] : vector<4xi32> into vector<4x4xi32>
+ // CHECK-DAG: %[[RES_2D_1:.+]] = vector.insert %[[RES_1]], %[[RES_2D_0]] [1] : vector<4xi32> into vector<4x4xi32>
+ // CHECK-DAG: %[[RES_2D_2:.+]] = vector.insert %[[RES_2]], %[[RES_2D_1]] [2] : vector<4xi32> into vector<4x4xi32>
+ // CHECK-DAG: %[[RES_2D_3:.+]] = vector.insert %[[RES_3]], %[[RES_2D_2]] [3] : vector<4xi32> into vector<4x4xi32>
+ %res = vector.contract {
+ indexing_maps = [
+ affine_map<(d0, d1, d2) -> (d0, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>
+ ], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>
+ } %lhs_wide, %rhs_wide, %acc : vector<4x4xi32>, vector<4x4xi32> into vector<4x4xi32>
+ // CHECK: return %[[RES_2D_3]]
+ return %res : vector<4x4xi32>
}
-// CHEC-LABEL: @vector_matmul_to_aarch64_asm_vec_dot
-// CHECK-DAG: %[[RES_2D:.+]] = arith.constant dense<0> : vector<4x4xi32>
-// CHECK-DAG: %[[LHS_2D:.+]] = vector.transfer_read
-// CHECK-DAG: %[[RHS_2d:.+]] = vector.transfer_read
-// CHECK-DAG: %[[DST:.+]] = vector.transfer_read
-// CHECK-DAG: %[[DST0:.+]] = vector.extract %[[DST]][0] : vector<4x4xi32>
-// CHECK-DAG: %[[DST1:.+]] = vector.extract %[[DST]][1] : vector<4x4xi32>
-// CHECK-DAG: %[[DST2:.+]] = vector.extract %[[DST]][2] : vector<4x4xi32>
-// CHECK-DAG: %[[DST3:.+]] = vector.extract %[[DST]][3] : vector<4x4xi32>
-// CHECK-DAG: %[[LHS:.+]] = vector.shape_cast %[[LHS_2D]] : vector<4x4xi8> to vector<16xi8>
-// CHECK: %[[RHS_T_2d:.+]] = vector.transpose %[[RHS_2d]], [1, 0]
-// CHECK: %[[RHS_T:.+]] = vector.shape_cast %[[RHS_T_2d]] : vector<4x4xi8> to vector<16xi8>
-// CHECK: %[[ASM_RESULT:.+]] = llvm.inline_asm {{.*}} "=w,=w,=w,=w,w,w,0,1,2,3" %[[LHS]], %[[RHS_T]], %[[DST0]], %[[DST1]], %[[DST2]], %[[DST3]]
-// CHECK-DAG: %[[RES_0:.+]] = llvm.extractvalue %[[ASM_RESULT]][0] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
-// CHECK-DAG: %[[RES_1:.+]] = llvm.extractvalue %[[ASM_RESULT]][1] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
-// CHECK-DAG: %[[RES_2:.+]] = llvm.extractvalue %[[ASM_RESULT]][2] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
-// CHECK-DAG: %[[RES_3:.+]] = llvm.extractvalue %[[ASM_RESULT]][3] : !llvm.struct<(vector<4xi32>, vector<4xi32>, vector<4xi32>, vector<4xi32>)>
-// CHECK-DAG: %[[RES_2D_0:.+]] = vector.insert %[[RES_0]], %[[RES_2D]] [0] : vector<4xi32> into vector<4x4xi32>
-// CHECK-DAG: %[[RES_2D_1:.+]] = vector.insert %[[RES_1]], %[[RES_2D_0]] [1] : vector<4xi32> into vector<4x4xi32>
-// CHECK-DAG: %[[RES_2D_2:.+]] = vector.insert %[[RES_2]], %[[RES_2D_1]] [2] : vector<4xi32> into vector<4x4xi32>
-// CHECK-DAG: %[[RES_2D_3:.+]] = vector.insert %[[RES_3]], %[[RES_2D_2]] [3] : vector<4xi32> into vector<4x4xi32>