blob: d29bcc4651eeb656af9c44bd1e8dad68ccf7e75d [file]
// Copyright 2023 The IREE Authors //
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/compiler/GlobalOptimization/Passes.h"
#include "iree/compiler/Dialect/Flow/IR/FlowDialect.h"
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
#include "iree/compiler/Dialect/Util/Transforms/Passes.h"
#include "iree/compiler/DispatchCreation/Passes.h"
#include "iree/compiler/Modules/IO/Parameters/Transforms/Passes.h"
#include "iree/compiler/Utils/PassUtils.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Transforms/Passes.h"
namespace mlir::iree_compiler::GlobalOptimization {
using FunctionLikeNest =
MultiOpNest<IREE::Util::InitializerOp, IREE::Util::FuncOp>;
static llvm::cl::opt<bool> clEnableQuantizedMatmulReassociation(
"iree-global-opt-enable-quantized-matmul-reassociation",
llvm::cl::desc(
"Enables reassociation of quantized matmul ops (experimental)."),
llvm::cl::init(false));
static llvm::cl::opt<bool> clEnableFuseSiluHorizontalMatmul(
"iree-global-opt-enable-fuse-silu-horizontal-matmul",
llvm::cl::desc(
"Enables fusing specifically structured matmuls (experimental)."),
llvm::cl::init(false));
static llvm::cl::opt<bool> clEnableTransposePropagation(
"iree-global-opt-propagate-transposes",
llvm::cl::desc(
"Enables propagation of transpose ops to improve fusion chances."),
llvm::cl::init(true));
// TODO(hanchung): Remove the flag. We don't want to do early materialization by
// default. Because it won't work for heterogeneous computing. This is not the
// right layer for handling such information.
static llvm::cl::opt<bool> clEnableEarlyMaterialization(
"iree-global-opt-enable-early-materialization",
llvm::cl::desc(
"Enables early materialization on encodings. Note, this flag should be "
"false eventually. This does not work for heterogeneous computing."),
llvm::cl::init(true));
static llvm::cl::opt<DemotionOption> clDemoteContractionInputsToBF16Strategy(
"iree-global-opt-enable-demote-contraction-inputs-to-bf16",
llvm::cl::desc("Demotes inputs (LHS, RHS) of contraction ops to BF16. "
"Selects types of contraction ops to demote."),
llvm::cl::values(
clEnumValN(DemotionOption::All, "all", "Demote all contraction ops."),
clEnumValN(DemotionOption::Conv, "conv",
"Only demote convolution ops."),
clEnumValN(DemotionOption::Matmul, "matmul", "Only demote matmul ops."),
clEnumValN(DemotionOption::None, "none", "Demote no contraction ops.")),
llvm::cl::init(DemotionOption::None));
static llvm::cl::opt<int> clPadFactor(
"iree-global-opt-pad-factor",
llvm::cl::desc("provides padding size hints that will be attached to "
"encodings."),
llvm::cl::init(32));
void buildGlobalOptExprHoistingPassPipeline(
OpPassManager &passManager, const TransformOptions &transformOptions) {
IREE::Util::ExprHoistingOptions options;
options.maxSizeIncreaseThreshold =
transformOptions.options.constExprMaxSizeIncreaseThreshold;
options.registerDependentDialectsFn = [](DialectRegistry &registry) {
registry.insert<IREE::Flow::FlowDialect>();
};
passManager.addPass(IREE::Util::createHoistIntoGlobalsPass(options));
}
void buildGlobalOptimizationPassPipeline(
OpPassManager &mainPassManager, const TransformOptions &transformOptions) {
// Import parameters before any global optimization passes so that the inlined
// parameters are available for folding.
if (!transformOptions.options.parameterImportPaths.empty()) {
IREE::IO::Parameters::ImportParametersPassOptions importParametersOptions;
importParametersOptions.scopePaths.assign(
transformOptions.options.parameterImportPaths.begin(),
transformOptions.options.parameterImportPaths.end());
importParametersOptions.keys.assign(
transformOptions.options.parameterImportKeys.begin(),
transformOptions.options.parameterImportKeys.end());
importParametersOptions.maximumSize =
transformOptions.options.parameterImportMaximumSize;
mainPassManager.addPass(IREE::IO::Parameters::createImportParametersPass(
importParametersOptions));
}
// Preprocessing passes to get the program into a canonical state.
FunctionLikeNest(mainPassManager)
.addPredicatedPass(transformOptions.options.stripAssertions,
IREE::Util::createStripDebugOpsPass)
.addPass(IREE::Util::createOptimizeIntArithmeticPass)
.addPass(createLinalgQuantizedConvToConvPass)
.addPass(createLinalgQuantizedMatmulToMatmulPass)
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(createRemoveZeroExtentTensorsPass)
.addPass(createDetachElementwiseFromNamedOpsPass)
.addPass(mlir::createLinalgNamedOpConversionPass);
mainPassManager.addPass(createEraseUnusedLinalgOperandsPass());
// Expand tensor shapes into SSA values and optimize the whole program.
// The more we are able to equate shape dimensions at this level the
// better our fusions will be.
mainPassManager.addPass(createExpandTensorShapesPass());
FunctionLikeNest(mainPassManager)
// Preprocess the input to a form more amenable for fusion
// - Convert all elementwise ops to Linalg
// - Remove unit-extent dimensions.
.addPass(mlir::createConvertElementwiseToLinalgPass)
// RaiseSpecialOps, by virtue of implementing various peephole
// optimizations, is sensitive to surrounding IR structure. Thus we run
// this pass both before unit dim folding + consteval, as well as after.
.addPass(createRaiseSpecialOpsPass)
// We decompose and transpose concatenations immediately before folding
// unit extent dims because this allows decoupling unit dims in the
// concatenation from the transposes that are introduced.
.addPass([&]() {
return createDecomposeConcatPass(
transformOptions.options.outerDimConcat);
})
// We generalize certain named ops immediately before folding unit extent
// dims as the unit dim folding pass updates indexing maps and is better
// at working with generics. By this point we have already done any
// specialized raising and the op names are no longer useful.
.addPass(createGeneralizeLinalgNamedOpsPass);
mainPassManager.addPass(DispatchCreation::createFoldUnitExtentDimsPass());
FunctionLikeNest(mainPassManager)
.addPredicatedPass(clEnableFuseSiluHorizontalMatmul,
createFuseSiluHorizontalMatmulPass)
.addPass([&]() {
return createDemoteContractionInputsToBF16Pass(
clDemoteContractionInputsToBF16Strategy);
})
.addPredicatedPass(clEnableQuantizedMatmulReassociation,
createFuseDequantizationMatmulPass)
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(mlir::createCSEPass)
// Propagate transposes immediately before set encoding/data tiling
// because transpose propagation cannot take an opinion on the preferred
// layout of various operations. This simplifies local propagation
// decisions as SetEncoding is expected to pick the ideal layout for
// that operation anyway, and this way we only need to make such a
// decision once.
.addPredicatedPass(
clEnableTransposePropagation,
[&]() {
return createPropagateLinalgTransposePass(
transformOptions.options.aggressiveTransposePropagation);
})
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(mlir::createCSEPass);
// Enable data tiling after they are in a canonical form.
if (transformOptions.options.dataTiling) {
FunctionLikeNest(mainPassManager).addPass([&]() {
return DispatchCreation::createSetEncodingPass(
DispatchCreation::SetEncodingPassOptions{clPadFactor});
});
// TODO(hanchung): Make data-tiling passes be FunctionOpInterface pass, so
// we can use `FunctionLikNest` here.
if (clEnableEarlyMaterialization) {
mainPassManager.addPass(createMaterializeHomogeneousEncodingsPass());
}
mainPassManager.addPass(IREE::Flow::createCanonicalizerPass());
mainPassManager.addPass(createCSEPass());
mainPassManager.addPass(createSimplifyPackUnpackPass());
FunctionLikeNest(mainPassManager).addPass(createDataLayoutPropagationPass);
}
// Generalize transposes and any other remaining named linalg ops that can
// now be represented as generics.
FunctionLikeNest(mainPassManager).addPass(createGeneralizeLinalgNamedOpsPass);
// Hoist loop invariants (e.g. from scf loops) with zero-trip-check.
FunctionLikeNest(mainPassManager)
.addPass(createGlobalLoopInvariantCodeMotionPass)
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(mlir::createCSEPass)
// Simplify util.global accesses early on; this can help with dispatch
// region formation as redundant store-loads are removed.
.addPass(IREE::Util::createSimplifyGlobalAccessesPass)
// Aggressive cleanup.
.addPass(IREE::Util::createApplyPatternsPass);
// Module level cleanup and canonicalization of util.global (and other
// util ops).
mainPassManager.addPass(IREE::Util::createFoldGlobalsPass());
mainPassManager.addPass(IREE::Util::createIPOPass());
FunctionLikeNest(mainPassManager)
.addPass(IREE::Util::createOptimizeIntArithmeticPass)
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(createCSEPass);
if (transformOptions.options.constExprHoisting) {
buildGlobalOptExprHoistingPassPipeline(mainPassManager, transformOptions);
}
if (transformOptions.buildConstEvalPassPipeline) {
transformOptions.buildConstEvalPassPipeline(mainPassManager);
}
if (transformOptions.options.numericPrecisionReduction) {
mainPassManager.addPass(createInferNumericNarrowingPass());
mainPassManager.addPass(createOptimizeNumericsPass());
mainPassManager.addPass(createCleanupNumericNarrowingPass());
}
FunctionLikeNest(mainPassManager)
.addPass(IREE::Flow::createCanonicalizerPass)
.addPass(mlir::createCSEPass)
// After running const-eval to a fixed point and folding unit extent dims,
// try any new raising opportunities.
.addPass(createRaiseSpecialOpsPass);
// Export after const-eval. If the user wants to keep the input constants
// as is in the final parameter archive, they will probably want to disable
// const-eval, or could run this pass as preprocessing. There might be a
// configuration in the future where users want to limit const-eval to smaller
// constants that aren't exported and skip it for larger parameters, but this
// is a sensible place for the common case of wanting const-eval in the final
// artifact + archive.
if (!transformOptions.options.parameterExportPath.empty()) {
IREE::IO::Parameters::ExportParametersPassOptions exportParametersOptions;
exportParametersOptions.scopePath =
transformOptions.options.parameterExportPath;
exportParametersOptions.minimumSize =
transformOptions.options.parameterExportMinimumSize;
mainPassManager.addPass(IREE::IO::Parameters::createExportParametersPass(
exportParametersOptions));
}
if (!transformOptions.options.parameterSplatExportFile.empty()) {
IREE::IO::Parameters::GenerateSplatParameterArchivePassOptions
generateSplatOptions;
generateSplatOptions.filePath =
transformOptions.options.parameterSplatExportFile;
mainPassManager.addPass(
IREE::IO::Parameters::createGenerateSplatParameterArchivePass(
generateSplatOptions));
}
}
namespace {
#define GEN_PASS_REGISTRATION
#include "iree/compiler/GlobalOptimization/Passes.h.inc" // IWYU pragma: export
} // namespace
void registerGlobalOptimizationPipeline() {
registerPasses();
PassPipelineRegistration<TransformOptions>
globalOptimizationTransformPassPipeline(
"iree-global-optimization-transformation-pipeline",
"Runs the IREE global optimization transformation pipeline",
[](OpPassManager &passManager,
const TransformOptions &transformOptions) {
buildGlobalOptimizationPassPipeline(passManager, transformOptions);
});
PassPipelineRegistration<TransformOptions>
globalOptimizationConstantHoistingPassPipeline(
"iree-global-optimization-hoist-constant-expressions",
"Hoists constant expressions with the preferred storage types for "
"global optimization",
[](OpPassManager &passManager,
const TransformOptions &transformOptions) {
buildGlobalOptExprHoistingPassPipeline(passManager,
transformOptions);
});
}
} // namespace mlir::iree_compiler::GlobalOptimization