Integrate llvm/llvm-project@c54064de80e93494d1d44550b56ce8f2f3cf9c4b (#16652)
Bump LLVM to include
https://github.com/llvm/llvm-project/commit/205dce6029bed302f354c0bde5d8c5804f214051
and
https://github.com/llvm/llvm-project/commit/3f18f6a2cfecb080f006477c46d3626102841a17
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
index 350b13c..54d9c99 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir
@@ -444,8 +444,9 @@
}
}
-// CHECK: %[[IDS:.+]]:4 = affine.delinearize_index %0 into (%c1, %c1, %c4, %c8) : index, index, index, index
-// CHECK: %[[LANEY:.+]] = affine.apply #map()[%1#2]
+// CHECK: %[[TIDX:.+]] = gpu.thread_id x
+// CHECK: %[[IDS:.+]]:4 = affine.delinearize_index %[[TIDX]] into (%c1, %c1, %c4, %c8) : index, index, index, index
+// CHECK: %[[LANEY:.+]] = affine.apply #map()[%[[IDS]]#2]
// CHECK: vector.extract %{{.*}}[0, 0, 0, 0]
// CHECK: vector.transpose %{{.*}}, [1, 0] : vector<1x4xf16> to vector<4x1xf16>
// CHECK: vector.transfer_write %{{.*}}[%[[LANEY]], %[[IDS]]#3]
@@ -501,7 +502,7 @@
// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[I0]], %[[LIN_ID0]]] {{.*}} permutation_map = #[[$MAP1]]
// CHECK: %[[LIN_ID1:.+]] = affine.apply #[[$MAP2]]()[%[[I0]]]
// CHECK: vector.extract %{{.*}}[1, 0, 0, 0]
-// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[LIN_ID1]], %3] {{.*}} permutation_map = #[[$MAP1]]
+// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[LIN_ID1]], %[[LIN_ID0]]] {{.*}} permutation_map = #[[$MAP1]]
// CHECK: %[[LIN_ID2:.+]] = affine.apply #[[$MAP3]]()[%[[IDS]]#2, %[[I1]]]
// CHECK: vector.extract %{{.*}}[0, 1, 0, 0]
// CHECK: vector.transfer_write %{{.*}}[%c0, %c0, %[[I0]], %[[LIN_ID2]]] {{.*}} permutation_map = #[[$MAP1]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
index b16675f..4fdb9dd 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
@@ -78,7 +78,7 @@
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index
// CHECK-DAG: %[[workgroup_id_x:.*]] = hal.interface.workgroup.id[0] : index
-// CHECK-DAG: %[[TIDX:.]] = gpu.thread_id x
+// CHECK-DAG: %[[TIDX:.+]] = gpu.thread_id x
// No allocation created for the per thread data.
// CHECK-NOT: memref.alloc()
@@ -153,7 +153,7 @@
// Fusion occurred, no barrier before the loop
// CHECK-NOT: gpu.barrier
// Local per-thread scf.for-based reduction.
-// CHECK: %[[TIDX:.]] = gpu.thread_id x
+// CHECK: %[[TIDX:.+]] = gpu.thread_id x
// CHECK: %[[v:.*]] = scf.for {{.*}} -> (vector<1xf32>)
// CHECK: vector.transfer_read {{.*}} vector<1xf32>
// CHECK: arith.addf{{.*}} : vector<1xf32>
@@ -216,7 +216,7 @@
// Fusion occurred, no barrier before the loop
// CHECK-NOT: gpu.barrier
// Local per-thread scf.for-based reduction.
-// CHECK: %[[TIDX:.]] = gpu.thread_id x
+// CHECK: %[[TIDX:.+]] = gpu.thread_id x
// CHECK: %[[TIDX_TIMES_4:.]] = affine.apply{{.*}}[%[[TIDX]]]
// CHECK: scf.for {{.*}} -> (vector<1xf32>) {
// CHECK: vector.transfer_read {{.*}} vector<4xf32>
@@ -366,7 +366,7 @@
// CHECK-DAG: %[[ALLOC0:.+]] = memref.alloc() {alignment = 64 : i64} : memref<1xi8, #gpu.address_space<workgroup>>
// Local per-thread scf.for-based reduction.
-// CHECK: %[[TIDX:.]] = gpu.thread_id x
+// CHECK: %[[TIDX:.+]] = gpu.thread_id x
// CHECK: scf.for {{.*}} -> (vector<1xi8>)
// CHECK: vector.transfer_read {{.*}} vector<1xi8>
// CHECK: arith.addi{{.*}} : vector<1xi8>
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
index d990f1c..3b92212 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
@@ -395,10 +395,25 @@
return false;
}
- // Do not fuse producer generic op if it has more than one user.
+ // Do not fuse producer generic op if it has more than one user
+ // or any reduction iterators.
if (auto producerGenericOp =
dyn_cast<linalg::GenericOp>(producer)) {
- return producerGenericOp->hasOneUse();
+ return producerGenericOp->hasOneUse() &&
+ llvm::all_of(producerGenericOp.getIteratorTypesArray(),
+ linalg::isParallelIterator);
+ }
+
+ // Do not fuse with any producer linalg named ops for now.
+ if (isa<linalg::LinalgOp>(producer)) {
+ return false;
+ }
+
+ // Do not fuse with consumer linalg named ops or reductions.
+ if (auto consumerLinalgOp = dyn_cast<linalg::LinalgOp>(consumer)) {
+ return isa<linalg::GenericOp>(consumerLinalgOp) &&
+ llvm::all_of(consumerLinalgOp.getIteratorTypesArray(),
+ linalg::isParallelIterator);
}
// Fuse in all other cases.
return true;
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index 828d9cc..e8ee911 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -56,6 +56,7 @@
/// (`pdlValues[1]`).
/// TODO: PDL needs user-defined "questions".
static LogicalResult nestedInFunc(PatternRewriter &rewriter,
+ PDLResultList &pdlResults,
ArrayRef<PDLValue> pdlValues) {
assert(pdlValues.size() == 2 && "expected 2 PDL values");
Operation *operation = pdlValues[0].cast<Operation *>();
@@ -167,6 +168,7 @@
/// then the bodies would be equivalent (really isomorphic).
/// 3. other cases TBD (e.g. vector.generic when available).
static LogicalResult isEquivalentToOp(PatternRewriter &rewriter,
+ PDLResultList &pdlResults,
ArrayRef<PDLValue> pdlValues) {
assert(pdlValues.size() == 2 && "expected 2 PDL values");
Operation *operation = pdlValues[0].cast<Operation *>();
@@ -207,6 +209,7 @@
/// Note: 0 is the convention to express "do not tile", it is considered to
/// divide everything.
static LogicalResult isDimMultipleOf(PatternRewriter &rewriter,
+ PDLResultList &pdlResults,
ArrayRef<PDLValue> pdlValues) {
assert(pdlValues.size() == 2 && "expected 2 PDL values");
ValueRange operands = pdlValues[0].cast<ValueRange>();
@@ -250,6 +253,7 @@
/// Succeed if `value`[`operand_number`] is a ranked type whose `dim` is
/// dynamic.
static LogicalResult isDimStatic(PatternRewriter &rewriter,
+ PDLResultList &pdlResults,
ArrayRef<PDLValue> pdlValues) {
assert(pdlValues.size() == 2 && "expected 2 PDL values");
ValueRange operands = pdlValues[0].cast<ValueRange>();
@@ -284,6 +288,7 @@
/// Succeed if `value`[`operand_number`] is a ranked type whose `dim` is
/// dynamic.
static LogicalResult isDimDynamic(PatternRewriter &rewriter,
+ PDLResultList &pdlResults,
ArrayRef<PDLValue> pdlValues) {
assert(pdlValues.size() == 2 && "expected 2 PDL values");
ValueRange operands = pdlValues[0].cast<ValueRange>();
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 4df364b..c54064d 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 4df364bc93af49ae413ec1ae8328f34ac70730c4
+Subproject commit c54064de80e93494d1d44550b56ce8f2f3cf9c4b