Letting FusionOfTensorOps apply patterns forever. (#11336)
The way the patterns interact causes what is effectively folding to
count against the max iteration limit. The default of 10 is
insufficient when the input program has a lot of constants to fold.
In the future we could explore top-down traversal as an alternative.
Also added logging so when this pass fails we see something on stderr.
Fixes #11333.
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
index de17c45..f446903 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
@@ -271,8 +271,12 @@
->getCanonicalizationPatterns(fusionPatterns);
memref::populateResolveRankedShapeTypeResultDimsPatterns(fusionPatterns);
+ GreedyRewriteConfig rewriteConfig;
+ rewriteConfig.maxIterations = GreedyRewriteConfig::kNoIterationLimit;
if (failed(applyPatternsAndFoldGreedily(funcOp->getRegions(),
- std::move(fusionPatterns)))) {
+ std::move(fusionPatterns),
+ rewriteConfig))) {
+ funcOp->emitError("failed to apply fusion patterns");
return signalPassFailure();
}
@@ -311,6 +315,7 @@
collapsingReshapePatterns);
if (failed(applyPatternsAndFoldGreedily(
funcOp->getRegions(), std::move(collapsingReshapePatterns)))) {
+ funcOp->emitError("failed to apply collapsing reshape patterns");
return signalPassFailure();
}
@@ -330,7 +335,10 @@
auto &dominanceInfo = getAnalysis<DominanceInfo>();
FailureOr<unsigned> numOfFusableCandidates =
fuseMultiUseProducers(funcOp, context, dominanceInfo);
- if (failed(numOfFusableCandidates)) return signalPassFailure();
+ if (failed(numOfFusableCandidates)) {
+ funcOp->emitError("failed to fuse multi-use producers");
+ return signalPassFailure();
+ }
if (numOfFusableCandidates.value() == 0) break;
}
}