blob: cf332c7da4d3dffbafae987beb88c4584c6c1a71 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "iree/compiler/Conversion/HLOToLinalg/Passes.h"
#include "iree/compiler/Conversion/Common/Attributes.h"
#include "iree/compiler/Conversion/Common/Passes.h"
#include "iree/compiler/Conversion/HLOToHLO/Passes.h"
#include "iree/compiler/Conversion/LLVMToLLVM/Passes.h"
#include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
#include "iree/compiler/Dialect/Shape/Transforms/Passes.h"
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/Passes.h"
namespace mlir {
namespace iree_compiler {
static llvm::cl::opt<bool> clEnableLLVMLinalgOnTensors(
"iree-codegen-llvm-experimental-linalg-on-tensors",
llvm::cl::desc("Enable the linalg on tensors experimental LLVM path"),
llvm::cl::init(false));
static llvm::cl::opt<bool> convImg2ColConversion(
"iree-codegen-linalg-to-llvm-conv-img2col-conversion",
llvm::cl::desc("Enable rewriting linalg.conv linalg.generic that does "
"img2col buffer packing + "
"linag.matmul"),
llvm::cl::init(false));
static llvm::cl::opt<bool> fastExpConversion(
"iree-codegen-linalg-to-llvm-fast-exp",
llvm::cl::desc("If true convert llvm.intr.exp into its range reduced "
"polynomial approximation."),
llvm::cl::init(false));
void addLinalgToLLVMPasses(OpPassManager &passManager) {
// Distribute linalg op among a 3d grid of parallel threads. Tile each
// workgroup thread memory then vectorize the linalg op.
passManager.addPass(createLinalgTileAndDistributePass());
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
if (!clEnableLLVMLinalgOnTensors) {
nestedModulePM.addPass(createLegalizeNumWorkgroupsFnPass());
}
// Linalg.ConvOp -> (Img2Col packing + matmul).
// After convolution is tiled and distributed among workgroups its converted
// before vectorize workgroup workload.
if (convImg2ColConversion) {
nestedModulePM.addNestedPass<FuncOp>(
createConvImg2ColMatmulConversionPass());
}
nestedModulePM.addNestedPass<FuncOp>(
createLinalgTileAndVectorizeWorkgroupsPass());
nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
// Linalg -> SCF
nestedModulePM.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
// SCF -> STD
nestedModulePM.addNestedPass<FuncOp>(createLowerToCFGPass());
nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
// (HAL, IREE, Linalg, STD) -> LLVM
// OpPassManager& llvmPassManager = nestedModulePM.nest<ModuleOp>();
nestedModulePM.addPass(createConvertToLLVMPass());
nestedModulePM.addPass(createCanonicalizerPass());
nestedModulePM.addPass(createCSEPass());
// Approximate llvm.intr.exp with a 4-th order ploynmial in range[0, ln2].
if (fastExpConversion) {
nestedModulePM.addPass(createFastExpApproximationConversionPass());
}
}
void buildLLVMTransformPassPipeline(OpPassManager &passManager) {
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
if (!clEnableLLVMLinalgOnTensors)
nestedModulePM.addPass(createDeclareNumWorkgroupsFnPass());
nestedModulePM.addPass(createInlinerPass());
// HLO -> Linalg on buffers.
if (clEnableLLVMLinalgOnTensors) {
nestedModulePM.addPass(createLinalgVectorizePass());
// Use stack allocation on CPU side.
WorkgroupMemoryAllocationFn allocationFn =
[](OpBuilder &builder, Location loc, ArrayRef<Value> dynamicSizes,
MemRefType allocationType) {
MemRefType allocType = MemRefType::get(
allocationType.getShape(), allocationType.getElementType());
return builder.create<AllocaOp>(loc, allocType, dynamicSizes);
};
addLinalgBufferizePasses(nestedModulePM, allocationFn);
nestedModulePM.addPass(createPromoteBuffersToStackPass(1 << 10, 64, 10));
} else {
// Propagates dynamic shapes computation on tensors.
nestedModulePM.addNestedPass<FuncOp>(Shape::createTieDynamicShapesPass());
nestedModulePM.addNestedPass<FuncOp>(
Shape::createMaterializeShapeCalculationsPass());
nestedModulePM.addNestedPass<FuncOp>(
Shape::createHoistShapeCalculationsPass());
nestedModulePM.addNestedPass<FuncOp>(createDecomposeHLOClampPass());
addHLOToLinalgOnBuffersPasses(nestedModulePM);
}
// Linalg -> LLVM passes.
addLinalgToLLVMPasses(passManager);
}
static PassPipelineRegistration<> linalgLLVMVPipeline(
"iree-codegen-linalg-to-llvm-pipeline",
"Runs the progressive lowering pipeline from Linalg to LLVM",
[](OpPassManager &passManager) {
buildLLVMTransformPassPipeline(passManager);
});
static PassPipelineRegistration<> hloToLinalgLLVMVPipeline(
"iree-codegen-hlo-to-llvm-pipeline",
"Runs the progressive lowering pipeline from XLA HLO to Linalg to LLVM",
[](OpPassManager &passManager) {
buildLLVMTransformPassPipeline(passManager);
});
} // namespace iree_compiler
} // namespace mlir