blob: e59f394efadecd5d7bfd742eef4d3ca5732b4511 [file] [log] [blame]
// Copyright 2019 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
#include <memory>
#include "iree/compiler/Dialect/Util/Transforms/Passes.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Transforms/Passes.h"
// TODO(ravishankarm): Change to a pipeline option.
static llvm::cl::opt<bool> clExportBenchmarkFuncs(
"iree-flow-export-benchmark-funcs",
llvm::cl::desc(
"Exports one function per original module entry point and "
"unique flow.executable that dispatches with dummy arguments."),
llvm::cl::init(false));
// TODO(ravishankarm): Change to a pipeline option.
static llvm::cl::opt<bool> clTraceDispatchTensors(
"iree-flow-trace-dispatch-tensors2",
llvm::cl::desc(
"Trace runtime input/output tensors for each dispatch function."),
llvm::cl::init(false));
static llvm::cl::opt<bool> clDemoteF32ToF16(
"iree-flow-demote-f32-to-f16",
llvm::cl::desc("Convert all f32 ops and values into f16 counterparts "
"unconditionally before main flow conversions"),
llvm::cl::init(false));
static llvm::cl::opt<bool> clEnableConvToImg2Col(
"iree-flow-enable-conv-img2col-transform",
llvm::cl::desc("Enable converting convolution ops to img2col form."),
llvm::cl::init(false));
static llvm::cl::opt<bool> clEnablePaddingLinalgOps(
"iree-flow-enable-padding-linalg-ops",
llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
"flow-padding-size"),
llvm::cl::init(false));
static llvm::cl::opt<int> clLinalgOpsPaddingSize(
"iree-flow-linalg-ops-padding-size",
llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
"flow-padding-size"),
llvm::cl::init(4));
// TODO(#1159): enable by default or remove this option once it works on
// a broader set of programs
static llvm::cl::opt<bool> clEnableLinalgDetensorize(
"iree-flow-enable-linalg-detensorize",
llvm::cl::desc("Enable detensorizing linalg ops to operate on primitives"),
llvm::cl::init(false));
namespace mlir {
namespace iree_compiler {
namespace IREE {
namespace Flow {
void buildFlowTransformPassPipeline(OpPassManager &passManager,
const TransformOptions &transformOptions) {
// Special case peephole optimizations.
{
passManager.addNestedPass<FuncOp>(createConvertConv2D1x1ToMatmulPass());
if (clEnableConvToImg2Col) {
passManager.addNestedPass<FuncOp>(createConvertConv2DToImg2ColPass());
}
// Pad linalg op
if (clEnablePaddingLinalgOps) {
passManager.addNestedPass<FuncOp>(
createPadLinalgOpsToIntegerMultiplePass(clLinalgOpsPaddingSize));
}
}
passManager.addNestedPass<mlir::FuncOp>(createVerifyInputLegalityPass());
// Simplify util.global accesses early on; this can help with dispatch
// region formation as redundant store-loads are removed.
passManager.addNestedPass<IREE::Util::InitializerOp>(
IREE::Util::createSimplifyGlobalAccessesPass());
passManager.addNestedPass<mlir::FuncOp>(
IREE::Util::createSimplifyGlobalAccessesPass());
// Cleanup and canonicalization of util.global (and other util ops).
passManager.addPass(IREE::Util::createApplyPatternsPass());
passManager.addPass(IREE::Util::createFoldGlobalsPass());
// Perform cleanup after variable simplification as more canonicalizers may be
// able to kick in.
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
passManager.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
passManager.addPass(createPadTensorToSubTensorInsertPass());
// Elementwise, fusion, tiling and distribution.
passManager.addNestedPass<mlir::FuncOp>(
mlir::createConvertElementwiseToLinalgPass());
passManager.addNestedPass<mlir::FuncOp>(
mlir::createLinalgFoldUnitExtentDimsPass());
passManager.addNestedPass<mlir::FuncOp>(createInterchangeGenericOpsPass());
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
passManager.addPass(memref::createResolveShapedTypeResultDimsPass());
passManager.addNestedPass<mlir::FuncOp>(createFusionOfTensorOpsPass());
passManager.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
if (clEnableLinalgDetensorize) {
passManager.addNestedPass<mlir::FuncOp>(
mlir::createLinalgDetensorizePass());
}
passManager.addNestedPass<mlir::FuncOp>(
createConvertToFlowBeforeDispatchFormation());
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
passManager.addNestedPass<mlir::FuncOp>(createDispatchLinalgOnTensorsPass());
passManager.addPass(memref::createResolveShapedTypeResultDimsPass());
passManager.addNestedPass<mlir::FuncOp>(
createConvertToFlowAfterDispatchFormation());
// NOTE: required because the current dispatch-linalg-on-tensors pass
// creates a lot of dead IR that needs to be cleaned up.
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
// Outline the dispatch regions into their own functions wrapped in
// executables.
passManager.addPass(createOutlineDispatchRegionsPass());
// Cleanup identity ops that clutter up the IR and canonicalize.
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
// Deduplicate executables created from dispatch regions.
// Note: this only deduplicates equivalent executables. We could in addition
// generalize executables to prune further (e.g. by promoting a dimension to
// an argument if two executables differ only in that one dimension).
passManager.addPass(createDeduplicateExecutablesPass());
// Create one function per remaining flow.executable that can be used with
// iree-benchmark-module to benchmark each dispatch individually, as well as
// exporting all original model entry points.
if (clExportBenchmarkFuncs) {
passManager.addPass(IREE::Flow::createExportBenchmarkFuncsPass());
}
// Inject tracing that logs both input and output tensors from all dispatches.
// We do this after deduping so that the executable names match later stages.
if (clTraceDispatchTensors) {
passManager.addNestedPass<mlir::FuncOp>(
IREE::Flow::createInjectDispatchTracingPass());
}
// Cleanup the IR after we are done.
passManager.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
passManager.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
passManager.addNestedPass<IREE::Flow::ExecutableOp>(
mlir::createCanonicalizerPass());
passManager.addNestedPass<IREE::Flow::ExecutableOp>(mlir::createCSEPass());
// Symbol DCE any remaining variables/functions that are now no longer
// required.
passManager.addPass(mlir::createSymbolDCEPass());
}
void registerFlowTransformPassPipeline() {
PassPipelineRegistration<TransformOptions> transformPassPipeline(
"iree-flow-transformation-pipeline",
"Runs the full IREE flow dialect transformation pipeline",
[](OpPassManager &passManager, const TransformOptions &transformOptions) {
buildFlowTransformPassPipeline(passManager, transformOptions);
});
}
namespace {
#define GEN_PASS_REGISTRATION
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc" // IWYU pragma: export
} // namespace
void registerFlowPasses() {
// Generated.
registerPasses();
// Pipelines.
registerFlowTransformPassPipeline();
}
} // namespace Flow
} // namespace IREE
} // namespace iree_compiler
} // namespace mlir