Integrate llvm-project @864902e9b4d8bc6d3f0852d5c475e3dc97dd8335 (#18843)
bump llvm-project to
https://github.com/llvm/llvm-project/commit/864902e9b4d8bc6d3f0852d5c475e3dc97dd8335
Main changes in IREE (causing test updates):
- New forall loop canonicalizations are added which fold induction
variables. This can canonicalilze away some index computation, and fold
some extract_slice ops, so a few pipeline tests are changed.
- A new option for `hoistRedundantVectorTransfers` (`verifyNonZeroTrip`)
is set, which prevents hoisting from possibly zero trip loops. This
option is flipped in this integrate because it fixes a bug exposed by
forall loop canonicalizations.
---------
Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
index f300291..ebaabcc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
@@ -91,7 +91,8 @@
// Workaround, run loop invariant code motion before hoist redundant
// vector transfer to workaround a bug upstream.
loopInvariantCodeMotion(funcOp);
- linalg::hoistRedundantVectorTransfers(cast<func::FuncOp>(funcOp));
+ linalg::hoistRedundantVectorTransfers(cast<func::FuncOp>(funcOp),
+ /*verifyNonZeroTrip=*/true);
}
IRRewriter rewriter(funcOp->getContext());
vector::transferOpflowOpt(rewriter, funcOp);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
index 66ac37b..0dc8b0f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
@@ -1013,11 +1013,8 @@
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c1000 step %c4 {{.*}} -> (vector<1x4xf32>)
// CHECK: gpu.barrier
-// TODO: The fact that this read gets hoisted out of the subsequent for loop
-// is a bug in LICM that does no verification that the loop has at least one
-// trip.
-// CHECK: %[[LHS_RD:.+]] = vector.transfer_read %[[B0]]{{.*}} vector<4xf32>
// CHECK: scf.for %{{.*}} = %{{.*}} to %c1 step %c32
+// CHECK: %[[LHS_RD:.+]] = vector.transfer_read %[[B0]]{{.*}} vector<4xf32>
// CHECK-NEXT: vector.transfer_write %[[LHS_RD]], %[[LHS_ALLOC]]
// CHECK: gpu.barrier
// CHECK-DAG: %[[LHS_MM:.+]] = vector.transfer_read %[[LHS_ALLOC]]{{.*}} vector<4xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
index cfa8875..daeaa22 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
@@ -49,7 +49,6 @@
// FOREACH-TO-GPU: %[[COND:.*]] = arith.andi %[[LT1]], %[[LT5]] : i1
// FOREACH-TO-GPU: scf.if %[[COND]] {
// FOREACH-TO-GPU: affine.apply #{{.*}}()[%[[TIDY]]]
- // FOREACH-TO-GPU: affine.apply #{{.*}}()[%[[TIDX]]]
// FOREACH-TO-GPU: linalg.fill
// FOREACH-TO-GPU: }
// FOREACH-TO-GPU: gpu.barrier
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index d97410b..5cc0b70 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
@@ -109,13 +109,13 @@
// CHECK: gpu.barrier
// CHECK: scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
// CHECK: %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Y]], 0]
-// CHECK: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
-// CHECK: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
// CHECK: scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]]
// CHECK: %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][0, %[[IV_X]]]
-// CHECK: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
-// CHECK: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
-// CHECK: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
+// CHECK-DAG: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
+// CHECK-DAG: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
// CHECK: %[[CT0:.+]] = vector.contract
// CHECK-SAME: %[[READ0]], %[[READ2]], %[[READ4]] : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16>
// CHECK: %[[CT1:.+]] = vector.contract
@@ -246,13 +246,13 @@
// CHECK: scf.for %[[IV_Z:.+]] = %[[ID_Z]] to %[[C1]] step %[[C1]]
// CHECK: scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
// CHECK: %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Z]], %[[IV_Y]], 0] [1, 16, 32]
-// CHECK: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
-// CHECK: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C16]]]
// CHECK: scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]] {
// CHECK: %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][%[[IV_Z]], 0, %[[IV_X]]] [1, 32, 16]
-// CHECK: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
-// CHECK: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C16]], %[[C0]]]
-// CHECK: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C16]]]
+// CHECK-DAG: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C16]], %[[C0]]]
+// CHECK-DAG: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]], %[[C0]]]
// CHECK: %[[CT0:.+]] = vector.contract
// CHECK-SAME: %[[READ0]], %[[READ2]], %[[READ4]] : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16>
// CHECK: %[[CT1:.+]] = vector.contract
@@ -369,13 +369,13 @@
// CHECK: gpu.barrier
// CHECK: scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
// CHECK: %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Y]], 0]
-// CHECK: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
-// CHECK: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
// CHECK: scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]]
// CHECK: %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][0, %[[IV_X]]]
-// CHECK: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
-// CHECK: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
-// CHECK: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
+// CHECK-DAG: %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
+// CHECK-DAG: %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
+// CHECK-DAG: %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
// CHECK: %[[EXTUI0:.+]] = arith.extui %[[READ0]] : vector<16x16xi8> to vector<16x16xi32>
// CHECK: %[[EXTUI1:.+]] = arith.extui %[[READ1]] : vector<16x16xi8> to vector<16x16xi32>
// CHECK: %[[EXTSI0:.+]] = arith.extsi %[[READ2]] : vector<16x16xi8> to vector<16x16xi32>
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 922992a..864902e 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 922992a22f7c87c192cf96606038df3cf20d6404
+Subproject commit 864902e9b4d8bc6d3f0852d5c475e3dc97dd8335