blob: f06fb14e0e1e2bd56825f8a26aa7f00d9e9b4935 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
// This is a copy of the matmul strategy infrastructure existing in mlir_edge.
// This version will be removed once this gets upstreamed to common mlir.
// Please try to limit changes in this code only minor changes or make sure the
// changes are applied in mlir_edge as well.
#include "iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/SCF/Utils.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorTransforms.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/Value.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
using namespace mlir; // NOLINT
using namespace mlir::linalg; // NOLINT
#define DEBUG_TYPE "matmul-codegen-strategy"
//===----------------------------------------------------------------------===//
// TODO: Cleanup and upstream these to go into core. Please ignore for now !
//===----------------------------------------------------------------------===//
static void hoistRedundantCopies(FuncOp func) {
bool changed = true;
while (changed) {
changed = false;
func.walk([&](linalg::FillOp op) {
auto loop = op.getParentOfType<scf::ForOp>();
if (!loop) return;
for (auto operand : op.getOperands())
if (!loop.isDefinedOutsideOfLoop(operand)) return;
// Hoist fill before.
op.getOperation()->moveBefore(loop);
changed = true;
});
func.walk([&](linalg::CopyOp op) {
auto loop = op.getParentOfType<scf::ForOp>();
if (!loop) return;
for (auto operand : op.getOperands())
if (!loop.isDefinedOutsideOfLoop(operand)) return;
Value sourceView = op.getInput(0);
while (auto subViewOp = sourceView.getDefiningOp<SubViewOp>())
sourceView = subViewOp.getViewSource();
// Source traces back to a block argument.
if (sourceView.isa<BlockArgument>()) {
op.getOperation()->moveBefore(loop);
} else {
assert(sourceView.getDefiningOp<ViewOp>() ||
sourceView.getDefiningOp<AllocOp>() ||
sourceView.getDefiningOp<AllocaOp>());
op.getOperation()->moveAfter(loop);
}
changed = true;
});
}
}
/// Substitute scf.for = %lb to %ub step %step by an AffineExpr expressing:
/// `%lb + %step * new_dim` where
/// 1. the AffineExpr for %lb is either an AffineConstantExpr or an
/// AffineDimExpr depending on whether the value is constant or not.
/// 2. the AffineExpr for %step is either an AffineConstantExpr or an
/// AffineSymbolExpr depending on whether the value is constant or not.
///
static void substitute(scf::ForOp forOp, SmallVectorImpl<AffineExpr> &exprs,
SmallVectorImpl<Value> &dims,
SmallVectorImpl<Value> &symbols) {
MLIRContext *ctx = forOp.getContext();
auto lbConstant = forOp.lowerBound().getDefiningOp<ConstantIndexOp>();
AffineExpr lb = lbConstant ? getAffineConstantExpr(lbConstant.getValue(), ctx)
: getAffineDimExpr(dims.size(), ctx);
auto stepConstant = forOp.step().getDefiningOp<ConstantIndexOp>();
AffineExpr step = stepConstant
? getAffineConstantExpr(stepConstant.getValue(), ctx)
: getAffineSymbolExpr(symbols.size(), ctx);
if (!lbConstant) dims.push_back(forOp.lowerBound());
if (!stepConstant) symbols.push_back(forOp.step());
exprs.push_back(lb + step * getAffineDimExpr(dims.size(), ctx));
auto ubConstant = forOp.upperBound().getDefiningOp<ConstantIndexOp>();
AffineExpr ub = ubConstant ? getAffineConstantExpr(ubConstant.getValue(), ctx)
: getAffineDimExpr(dims.size(), ctx);
if (!ubConstant) dims.push_back(forOp.upperBound());
exprs.push_back(ub);
dims.push_back(forOp.getInductionVar());
}
/// Substitue dimensions coming from forOp or AffineMin. Return false if it has
/// unknown dimension operands.
static bool substitute(AffineMinOp minOp, SmallVectorImpl<AffineExpr> &exprs,
SmallVectorImpl<Value> &dims,
SmallVectorImpl<Value> &symbols) {
if (minOp.getDimOperands().empty()) return false;
for (Value v : minOp.getDimOperands()) {
if (auto forOp = scf::getForInductionVarOwner(v)) {
substitute(forOp, exprs, dims, symbols);
continue;
}
if (auto parentMinOp = v.getDefiningOp<AffineMinOp>()) {
substitute(parentMinOp, exprs, dims, symbols);
continue;
}
// If couldn't substitue the dimension give up and use the original map.
return false;
}
return true;
}
LogicalResult AffineMinCanonicalizationPattern::matchAndRewrite(
AffineMinOp minOp, PatternRewriter &rewriter) const {
LLVM_DEBUG(llvm::dbgs() << "\nCanonicalize AffineMin: "
<< *minOp.getOperation() << "\n");
int64_t min = std::numeric_limits<int64_t>::max();
for (auto e : minOp.map().getResults())
if (auto cstExpr = e.dyn_cast<AffineConstantExpr>())
min = std::min(min, cstExpr.getValue());
if (min == std::numeric_limits<int64_t>::max()) return failure();
MLIRContext *ctx = minOp.getContext();
AffineMap map;
SmallVector<Value, 4> operands;
SmallVector<AffineExpr, 4> exprs;
SmallVector<Value, 4> dims, symbols;
if (substitute(minOp, exprs, dims, symbols)) {
operands = dims;
operands.append(symbols.begin(), symbols.end());
map = AffineMap::get(dims.size(), symbols.size(), exprs, ctx);
LLVM_DEBUG(llvm::dbgs() << "Substitution map: " << map << "\n");
} else {
map = minOp.getAffineMap();
operands = minOp.getDimOperands();
operands.append(minOp.getSymbolOperands().begin(),
minOp.getSymbolOperands().end());
}
SmallVector<AffineExpr, 4> modExprs;
for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx)
modExprs.push_back(getAffineDimExpr(idx, ctx) % min);
map = AffineMap::get(map.getNumResults(), 0, modExprs, ctx).compose(map);
canonicalizeMapAndOperands(&map, &operands);
map = simplifyAffineMap(map);
LLVM_DEBUG(llvm::dbgs() << "Post mod: " << map << "\n";
llvm::interleaveComma(operands, llvm::dbgs()));
if (!llvm::all_of(map.getResults(), [](AffineExpr e) {
if (auto cst = e.dyn_cast<AffineConstantExpr>())
return cst.getValue() == 0;
return false;
}))
return failure();
rewriter.replaceOpWithNewOp<ConstantIndexOp>(minOp, min);
return success();
}
//===----------------------------------------------------------------------===//
// END TODO
//===----------------------------------------------------------------------===//
void MatmulCodegenStrategy::transform(FuncOp func) const {
MLIRContext *context = func.getContext();
// Emplace patterns one at a time while also maintaining a simple chained
// state transition.
unsigned stepCount = 0;
SmallVector<FrozenRewritePatternList, 4> stage1Patterns;
auto zeroState = Identifier::get(std::to_string(stepCount), context);
auto currentState = zeroState;
for (auto &t : transformationSequence) {
auto nextState = Identifier::get(std::to_string(++stepCount), context);
auto marker = (currentState == zeroState)
? linalg::LinalgMarker({}, nextState)
: linalg::LinalgMarker(currentState, nextState);
stage1Patterns.emplace_back(t->buildRewritePatterns(context, marker));
currentState = nextState;
}
OwningRewritePatternList stage2Patterns =
linalg::getLinalgTilingCanonicalizationPatterns(context);
// Add extra patterns to canonicalize AffineMin in combination with scf loops
// operations after tiling.
stage2Patterns.insert<AffineMinCanonicalizationPattern,
AffineMinSCFCanonicalizationPattern>(context);
auto stage3Transforms = [](Operation *op) {
promoteSingleIterationLoops(cast<FuncOp>(op));
return success();
};
linalg::applyStagedPatterns(func, stage1Patterns, std::move(stage2Patterns),
stage3Transforms);
auto postStageTransforms = [this](Operation *op) {
// Run LICM and hoisting patterns after all the stages as we want to
// unrolling before moving transfer ops out of the loop.
if (hoistInvariantCode) {
PassManager pm(op->getContext());
pm.addPass(createLoopInvariantCodeMotionPass());
if (failed(pm.run(op->getParentOfType<ModuleOp>())))
llvm_unreachable("Unexpected failure in cleanup pass pipeline.");
hoistViewAllocOps(cast<FuncOp>(op));
hoistRedundantVectorTransfers(cast<FuncOp>(op));
hoistRedundantCopies(cast<FuncOp>(op));
}
};
postStageTransforms(func);
if (lowering != nullptr) lowering(func);
}
// Parametric lowering of vector contract for CPU target.
static void cpuLowering(
FuncOp func, const vector::VectorTransformsOptions &vectorTransformsOptions,
const VectorTransferToSCFOptions &vectorToSCFOptions) {
// Programmatic controlled lowering of vector.contract only.
MLIRContext *context = func.getContext();
OwningRewritePatternList vectorContractLoweringPatterns;
vectorContractLoweringPatterns
.insert<ContractionOpToOuterProductOpLowering,
ContractionOpToMatmulOpLowering, ContractionOpLowering>(
vectorTransformsOptions, context);
applyPatternsAndFoldGreedily(func, std::move(vectorContractLoweringPatterns));
// Programmatic controlled lowering of vector.transfer only.
OwningRewritePatternList vectorToLoopsPatterns;
populateVectorToSCFConversionPatterns(vectorToLoopsPatterns, context,
vectorToSCFOptions);
applyPatternsAndFoldGreedily(func, std::move(vectorToLoopsPatterns));
}
MatmulCodegenStrategy &MatmulCodegenStrategy::setDefaultCPULowering() {
auto lowering = [this](FuncOp func) {
cpuLowering(func, vectorTransformsOptions, vectorToSCFOptions);
};
return setLoweringFunction(lowering);
}