Integrate llvm/llvm-project@c54064de80e93494d1d44550b56ce8f2f3cf9c4b (#16652)

Bump LLVM to include
https://github.com/llvm/llvm-project/commit/205dce6029bed302f354c0bde5d8c5804f214051
and
https://github.com/llvm/llvm-project/commit/3f18f6a2cfecb080f006477c46d3626102841a17
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
index 350b13c..54d9c99 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
@@ -444,8 +444,9 @@
   }
 }
 
-// CHECK: %[[IDS:.+]]:4 = affine.delinearize_index %0 into (%c1, %c1, %c4, %c8) : index, index, index, index
-// CHECK: %[[LANEY:.+]] = affine.apply #map()[%1#2]
+// CHECK: %[[TIDX:.+]] = gpu.thread_id  x
+// CHECK: %[[IDS:.+]]:4 = affine.delinearize_index %[[TIDX]] into (%c1, %c1, %c4, %c8) : index, index, index, index
+// CHECK: %[[LANEY:.+]] = affine.apply #map()[%[[IDS]]#2]
 // CHECK: vector.extract %{{.*}}[0, 0, 0, 0]
 // CHECK: vector.transpose %{{.*}}, [1, 0] : vector<1x4xf16> to vector<4x1xf16>
 // CHECK: vector.transfer_write %{{.*}}[%[[LANEY]], %[[IDS]]#3]
@@ -501,7 +502,7 @@
 // CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[I0]], %[[LIN_ID0]]] {{.*}} permutation_map = #[[$MAP1]]
 // CHECK: %[[LIN_ID1:.+]] = affine.apply #[[$MAP2]]()[%[[I0]]]
 // CHECK: vector.extract %{{.*}}[1, 0, 0, 0]
-// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[LIN_ID1]], %3] {{.*}} permutation_map = #[[$MAP1]]
+// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[LIN_ID1]], %[[LIN_ID0]]] {{.*}} permutation_map = #[[$MAP1]]
 // CHECK: %[[LIN_ID2:.+]] = affine.apply #[[$MAP3]]()[%[[IDS]]#2, %[[I1]]]
 // CHECK: vector.extract %{{.*}}[0, 1, 0, 0]
 // CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[I0]], %[[LIN_ID2]]] {{.*}} permutation_map = #[[$MAP1]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
index b16675f..4fdb9dd 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
@@ -78,7 +78,7 @@
 //     CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //     CHECK-DAG:   %[[C32:.*]] = arith.constant 32 : index
 //     CHECK-DAG:   %[[workgroup_id_x:.*]] = hal.interface.workgroup.id[0] : index
-//     CHECK-DAG:   %[[TIDX:.]] = gpu.thread_id  x
+//     CHECK-DAG:   %[[TIDX:.+]] = gpu.thread_id  x
 
 // No allocation created for the per thread data.
 //     CHECK-NOT:   memref.alloc()
@@ -153,7 +153,7 @@
 // Fusion occurred, no barrier before the loop
 //     CHECK-NOT: gpu.barrier
 // Local per-thread scf.for-based reduction.
-//         CHECK: %[[TIDX:.]] = gpu.thread_id  x
+//         CHECK: %[[TIDX:.+]] = gpu.thread_id  x
 //         CHECK: %[[v:.*]] = scf.for {{.*}} -> (vector<1xf32>)
 //         CHECK:   vector.transfer_read {{.*}} vector<1xf32>
 //         CHECK:   arith.addf{{.*}} : vector<1xf32>
@@ -216,7 +216,7 @@
 // Fusion occurred, no barrier before the loop
 //     CHECK-NOT: gpu.barrier
 // Local per-thread scf.for-based reduction.
-//         CHECK: %[[TIDX:.]] = gpu.thread_id  x
+//         CHECK: %[[TIDX:.+]] = gpu.thread_id  x
 //         CHECK: %[[TIDX_TIMES_4:.]] = affine.apply{{.*}}[%[[TIDX]]]
 //         CHECK: scf.for {{.*}} -> (vector<1xf32>) {
 //         CHECK:   vector.transfer_read {{.*}} vector<4xf32>
@@ -366,7 +366,7 @@
 
 //     CHECK-DAG: %[[ALLOC0:.+]] = memref.alloc() {alignment = 64 : i64} : memref<1xi8, #gpu.address_space<workgroup>>
 // Local per-thread scf.for-based reduction.
-//         CHECK: %[[TIDX:.]] = gpu.thread_id  x
+//         CHECK: %[[TIDX:.+]] = gpu.thread_id  x
 //         CHECK: scf.for {{.*}} -> (vector<1xi8>)
 //         CHECK:   vector.transfer_read {{.*}} vector<1xi8>
 //         CHECK:   arith.addi{{.*}} : vector<1xi8>
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
index d990f1c..3b92212 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
@@ -395,10 +395,25 @@
               return false;
             }
 
-            // Do not fuse producer generic op if it has more than one user.
+            // Do not fuse producer generic op if it has more than one user
+            // or any reduction iterators.
             if (auto producerGenericOp =
                     dyn_cast<linalg::GenericOp>(producer)) {
-              return producerGenericOp->hasOneUse();
+              return producerGenericOp->hasOneUse() &&
+                     llvm::all_of(producerGenericOp.getIteratorTypesArray(),
+                                  linalg::isParallelIterator);
+            }
+
+            // Do not fuse with any producer linalg named ops for now.
+            if (isa<linalg::LinalgOp>(producer)) {
+              return false;
+            }
+
+            // Do not fuse with consumer linalg named ops or reductions.
+            if (auto consumerLinalgOp = dyn_cast<linalg::LinalgOp>(consumer)) {
+              return isa<linalg::GenericOp>(consumerLinalgOp) &&
+                     llvm::all_of(consumerLinalgOp.getIteratorTypesArray(),
+                                  linalg::isParallelIterator);
             }
             // Fuse in all other cases.
             return true;
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index 828d9cc..e8ee911 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -56,6 +56,7 @@
 /// (`pdlValues[1]`).
 /// TODO: PDL needs user-defined "questions".
 static LogicalResult nestedInFunc(PatternRewriter &rewriter,
+                                  PDLResultList &pdlResults,
                                   ArrayRef<PDLValue> pdlValues) {
   assert(pdlValues.size() == 2 && "expected 2 PDL values");
   Operation *operation = pdlValues[0].cast<Operation *>();
@@ -167,6 +168,7 @@
 ///           then the bodies would be equivalent (really isomorphic).
 ///   3. other cases TBD (e.g. vector.generic when available).
 static LogicalResult isEquivalentToOp(PatternRewriter &rewriter,
+                                      PDLResultList &pdlResults,
                                       ArrayRef<PDLValue> pdlValues) {
   assert(pdlValues.size() == 2 && "expected 2 PDL values");
   Operation *operation = pdlValues[0].cast<Operation *>();
@@ -207,6 +209,7 @@
 /// Note: 0 is the convention to express "do not tile", it is considered to
 /// divide everything.
 static LogicalResult isDimMultipleOf(PatternRewriter &rewriter,
+                                     PDLResultList &pdlResults,
                                      ArrayRef<PDLValue> pdlValues) {
   assert(pdlValues.size() == 2 && "expected 2 PDL values");
   ValueRange operands = pdlValues[0].cast<ValueRange>();
@@ -250,6 +253,7 @@
 /// Succeed if `value`[`operand_number`] is a ranked type whose `dim` is
 /// dynamic.
 static LogicalResult isDimStatic(PatternRewriter &rewriter,
+                                 PDLResultList &pdlResults,
                                  ArrayRef<PDLValue> pdlValues) {
   assert(pdlValues.size() == 2 && "expected 2 PDL values");
   ValueRange operands = pdlValues[0].cast<ValueRange>();
@@ -284,6 +288,7 @@
 /// Succeed if `value`[`operand_number`] is a ranked type whose `dim` is
 /// dynamic.
 static LogicalResult isDimDynamic(PatternRewriter &rewriter,
+                                  PDLResultList &pdlResults,
                                   ArrayRef<PDLValue> pdlValues) {
   assert(pdlValues.size() == 2 && "expected 2 PDL values");
   ValueRange operands = pdlValues[0].cast<ValueRange>();
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 4df364b..c54064d 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 4df364bc93af49ae413ec1ae8328f34ac70730c4
+Subproject commit c54064de80e93494d1d44550b56ce8f2f3cf9c4b