Integrate llvm-project @864902e9b4d8bc6d3f0852d5c475e3dc97dd8335 (#18843)

bump llvm-project to
https://github.com/llvm/llvm-project/commit/864902e9b4d8bc6d3f0852d5c475e3dc97dd8335

Main changes in IREE (causing test updates):
- New forall loop canonicalizations are added which fold induction
variables. This can canonicalilze away some index computation, and fold
some extract_slice ops, so a few pipeline tests are changed.
- A new option for `hoistRedundantVectorTransfers` (`verifyNonZeroTrip`)
is set, which prevents hoisting from possibly zero trip loops. This
option is flipped in this integrate because it fixes a bug exposed by
forall loop canonicalizations.

---------

Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
index f300291..ebaabcc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
@@ -91,7 +91,8 @@
       // Workaround, run loop invariant code motion before hoist redundant
       // vector transfer to workaround a bug upstream.
       loopInvariantCodeMotion(funcOp);
-      linalg::hoistRedundantVectorTransfers(cast<func::FuncOp>(funcOp));
+      linalg::hoistRedundantVectorTransfers(cast<func::FuncOp>(funcOp),
+                                            /*verifyNonZeroTrip=*/true);
     }
     IRRewriter rewriter(funcOp->getContext());
     vector::transferOpflowOpt(rewriter, funcOp);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
index 66ac37b..0dc8b0f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
@@ -1013,11 +1013,8 @@
 //       CHECK:   %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c1000 step %c4 {{.*}} -> (vector<1x4xf32>)
 //       CHECK:     gpu.barrier
 
-// TODO: The fact that this read gets hoisted out of the subsequent for loop
-// is a bug in LICM that does no verification that the loop has at least one
-// trip.
-//       CHECK:     %[[LHS_RD:.+]] = vector.transfer_read %[[B0]]{{.*}} vector<4xf32>
 //       CHECK:     scf.for %{{.*}} = %{{.*}} to %c1 step %c32
+//       CHECK:       %[[LHS_RD:.+]] = vector.transfer_read %[[B0]]{{.*}} vector<4xf32>
 //  CHECK-NEXT:       vector.transfer_write %[[LHS_RD]], %[[LHS_ALLOC]]
 //       CHECK:     gpu.barrier
 //   CHECK-DAG:     %[[LHS_MM:.+]] = vector.transfer_read %[[LHS_ALLOC]]{{.*}} vector<4xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
index cfa8875..daeaa22 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
@@ -49,7 +49,6 @@
   // FOREACH-TO-GPU: %[[COND:.*]]  = arith.andi %[[LT1]], %[[LT5]] : i1
   // FOREACH-TO-GPU: scf.if %[[COND]] {
   // FOREACH-TO-GPU:   affine.apply #{{.*}}()[%[[TIDY]]]
-  // FOREACH-TO-GPU:   affine.apply #{{.*}}()[%[[TIDX]]]
   // FOREACH-TO-GPU:   linalg.fill
   // FOREACH-TO-GPU: }
   // FOREACH-TO-GPU: gpu.barrier
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index d97410b..5cc0b70 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
@@ -109,13 +109,13 @@
 //       CHECK:     gpu.barrier
 //       CHECK:     scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
 //       CHECK:       %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Y]], 0]
-//       CHECK:       %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
-//       CHECK:       %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
 //       CHECK:       scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]]
 //       CHECK:         %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][0, %[[IV_X]]]
-//       CHECK:         %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
-//       CHECK:         %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
-//       CHECK:         %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
+//   CHECK-DAG:         %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
+//   CHECK-DAG:         %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
 //       CHECK:         %[[CT0:.+]] = vector.contract
 //  CHECK-SAME:           %[[READ0]], %[[READ2]], %[[READ4]] : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16>
 //       CHECK:         %[[CT1:.+]] = vector.contract
@@ -246,13 +246,13 @@
 //       CHECK:     scf.for %[[IV_Z:.+]] = %[[ID_Z]] to %[[C1]] step %[[C1]]
 //       CHECK:       scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
 //       CHECK:         %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Z]], %[[IV_Y]], 0] [1, 16, 32]
-//       CHECK:         %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
-//       CHECK:         %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C16]]]
 //       CHECK:         scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]] {
 //       CHECK:           %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][%[[IV_Z]], 0, %[[IV_X]]] [1, 32, 16]
-//       CHECK:           %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
-//       CHECK:           %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C16]], %[[C0]]]
-//       CHECK:           %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]], %[[C0]]]
+//     CHECK-DAG:         %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
+//     CHECK-DAG:         %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]], %[[C16]]]
+//     CHECK-DAG:         %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]], %[[C0]]]
+//     CHECK-DAG:         %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C16]], %[[C0]]]
+//     CHECK-DAG:         %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]], %[[C0]]]
 //       CHECK:           %[[CT0:.+]] = vector.contract
 //  CHECK-SAME:             %[[READ0]], %[[READ2]], %[[READ4]] : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16>
 //       CHECK:           %[[CT1:.+]] = vector.contract
@@ -369,13 +369,13 @@
 //       CHECK:     gpu.barrier
 //       CHECK:     scf.for %[[IV_Y:.+]] = %[[OFFSET_Y]] to %[[C32]] step %[[C32]]
 //       CHECK:       %[[LHS_VIEW:.+]] = memref.subview %[[LHS_ALLOC]][%[[IV_Y]], 0]
-//       CHECK:       %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
-//       CHECK:       %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
 //       CHECK:       scf.for %[[IV_X:.+]] = %[[OFFSET_X]] to %[[C32]] step %[[C32]]
 //       CHECK:         %[[RHS_VIEW:.+]] = memref.subview %[[RHS_ALLOC]][0, %[[IV_X]]]
-//       CHECK:         %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
-//       CHECK:         %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
-//       CHECK:         %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ0:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ1:.+]] = vector.transfer_read %[[LHS_VIEW]][%[[C0]], %[[C16]]]
+//   CHECK-DAG:         %[[READ2:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C0]], %[[C0]]]
+//   CHECK-DAG:         %[[READ3:.+]] = vector.transfer_read %[[RHS_VIEW]][%[[C16]], %[[C0]]]
+//   CHECK-DAG:         %[[READ4:.+]] = vector.transfer_read %{{.+}}[%[[C0]], %[[C0]]]
 //       CHECK:         %[[EXTUI0:.+]] = arith.extui %[[READ0]] : vector<16x16xi8> to vector<16x16xi32>
 //       CHECK:         %[[EXTUI1:.+]] = arith.extui %[[READ1]] : vector<16x16xi8> to vector<16x16xi32>
 //       CHECK:         %[[EXTSI0:.+]] = arith.extsi %[[READ2]] : vector<16x16xi8> to vector<16x16xi32>
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 922992a..864902e 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 922992a22f7c87c192cf96606038df3cf20d6404
+Subproject commit 864902e9b4d8bc6d3f0852d5c475e3dc97dd8335