| // Copyright 2019 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "iree/compiler/Dialect/Flow/Transforms/Passes.h" |
| |
| #include <memory> |
| |
| #include "iree/compiler/Dialect/Util/Transforms/Passes.h" |
| #include "iree/compiler/Utils/PassUtils.h" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" |
| #include "mlir/Dialect/Linalg/Passes.h" |
| #include "mlir/Dialect/MemRef/Transforms/Passes.h" |
| #include "mlir/Pass/PassOptions.h" |
| #include "mlir/Pass/PassRegistry.h" |
| #include "mlir/Transforms/Passes.h" |
| |
| // TODO(ravishankarm): Change to a pipeline option. |
| static llvm::cl::opt<bool> clExportBenchmarkFuncs( |
| "iree-flow-export-benchmark-funcs", |
| llvm::cl::desc( |
| "Exports one function per original module entry point and " |
| "unique flow.executable that dispatches with dummy arguments."), |
| llvm::cl::init(false)); |
| |
| // TODO(ravishankarm): Change to a pipeline option. |
| static llvm::cl::opt<bool> clTraceDispatchTensors( |
| "iree-flow-trace-dispatch-tensors2", |
| llvm::cl::desc( |
| "Trace runtime input/output tensors for each dispatch function."), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<bool> clDemoteF32ToF16( |
| "iree-flow-demote-f32-to-f16", |
| llvm::cl::desc("Convert all f32 ops and values into f16 counterparts " |
| "unconditionally before main flow conversions"), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<bool> clEnableConvToImg2Col( |
| "iree-flow-enable-conv-img2col-transform", |
| llvm::cl::desc("Enable converting convolution ops to img2col form."), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<bool> clEnablePaddingLinalgOps( |
| "iree-flow-enable-padding-linalg-ops", |
| llvm::cl::desc("Enable padding linalg ops to an integer multiple of " |
| "flow-padding-size"), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<bool> clEnableFusePaddingIntoConsumerOps( |
| "iree-flow-enable-fuse-padding-into-consumer-ops", |
| llvm::cl::desc("Enable fusing linalg pad_tensor ops into consumer ops"), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<int> clLinalgOpsPaddingSize( |
| "iree-flow-linalg-ops-padding-size", |
| llvm::cl::desc("Enable padding linalg ops to an integer multiple of " |
| "flow-padding-size"), |
| llvm::cl::init(4)); |
| |
| // TODO(#1159): enable by default or remove this option once it works on |
| // a broader set of programs |
| static llvm::cl::opt<bool> clEnableLinalgDetensorize( |
| "iree-flow-enable-linalg-detensorize", |
| llvm::cl::desc("Enable detensorizing linalg ops to operate on primitives"), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<std::string> clMmt4dTargetOptions( |
| "iree-flow-mmt4d-target-options", |
| llvm::cl::desc("Convert linalg.matmul ops to MMT4D ops targetting the " |
| "given architecture"), |
| llvm::cl::init("")); |
| |
| namespace mlir { |
| namespace iree_compiler { |
| namespace IREE { |
| namespace Flow { |
| |
| namespace { |
| |
| using FunctionLikeNest = MultiOpNest<func::FuncOp, IREE::Util::InitializerOp>; |
| |
| // Subset of the overall pass pipeline for optimizing globals and numerics. |
| // We may ultimately break this out separately so creating a syntactic |
| // distinction to keep that as an option. |
| void buildGlobalOptimizationPassPipeline( |
| OpPassManager &mainPassManager, const TransformOptions &transformOptions) { |
| OpPassManager pipeline(ModuleOp::getOperationName()); |
| |
| FunctionLikeNest(pipeline) |
| // Simplify util.global accesses early on; this can help with dispatch |
| // region formation as redundant store-loads are removed. |
| .addPass(IREE::Util::createSimplifyGlobalAccessesPass); |
| |
| // Module level cleanup and canonicalization of util.global (and other util |
| // ops). |
| pipeline.addPass(IREE::Util::createApplyPatternsPass()); |
| pipeline.addPass(IREE::Util::createFoldGlobalsPass()); |
| |
| if (transformOptions.constExprHoisting) { |
| pipeline.addPass(IREE::Util::createHoistIntoGlobalsPass()); |
| } |
| |
| if (transformOptions.buildConstEvalPassPipeline) { |
| transformOptions.buildConstEvalPassPipeline(pipeline); |
| } |
| |
| if (transformOptions.numericPrecisionReduction) { |
| pipeline.addPass(createInferNumericNarrowingPass()); |
| pipeline.addPass(createOptimizeNumericsPass()); |
| pipeline.addPass(createCleanupNumericNarrowingPass()); |
| } |
| |
| FunctionLikeNest(pipeline) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass); |
| |
| // Add the whole fixed point iterator. |
| mainPassManager.addPass( |
| IREE::Util::createFixedPointIteratorPass(std::move(pipeline))); |
| } |
| |
| } // namespace |
| |
| void buildFlowTransformPassPipeline(OpPassManager &passManager, |
| const TransformOptions &transformOptions) { |
| // Special case peephole optimizations. |
| FunctionLikeNest(passManager) |
| .addPass(IREE::Flow::createConvertConv2D1x1ToMatmulPass) |
| .addPredicatedPass(clEnableConvToImg2Col, |
| IREE::Flow::createConvertConv2DToImg2ColPass) |
| // Input should now be legal. |
| .addPass(IREE::Flow::createVerifyInputLegalityPass) |
| // Catch matmul ops before we do anything else with them. |
| .addPredicatedPass( |
| !clMmt4dTargetOptions.empty(), |
| []() { |
| return IREE::Flow::createConvertLinalgMatmulToMmt4DPass( |
| clMmt4dTargetOptions); |
| }) |
| // Pad linalg ops |
| .addPredicatedPass(clEnablePaddingLinalgOps, []() { |
| return IREE::Flow::createPadLinalgOpsToIntegerMultiplePass( |
| clLinalgOpsPaddingSize); |
| }); |
| |
| passManager.addPass(mlir::createLinalgNamedOpConversionPass()); |
| |
| // Expand tensor shapes into SSA values and optimize the whole program. |
| // The more we are able to equate shape dimensions at this level the better |
| // our fusions will be. |
| passManager.addPass(IREE::Flow::createExpandTensorShapesPass()); |
| buildGlobalOptimizationPassPipeline(passManager, transformOptions); |
| |
| FunctionLikeNest(passManager) |
| // Pad tensors. |
| .addPredicatedPass((!clEnableFusePaddingIntoConsumerOps), |
| IREE::Flow::createPadTensorToSubTensorInsertPass) |
| |
| // Preprocess the input to a form more amenable for fusion |
| // - Convert all elementwise ops to Linalg |
| // - Remove unit-extent dimensions. |
| .addPass(mlir::createConvertElementwiseToLinalgPass) |
| .addPass(mlir::createLinalgFoldUnitExtentDimsPass) |
| .addPass(createInterchangeGenericOpsPass) |
| .addPass(memref::createResolveShapedTypeResultDimsPass) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass) |
| |
| // Elementwise fusion. |
| .addPass(createFusionOfTensorOpsPass) |
| .addPredicatedPass(clEnableLinalgDetensorize, |
| mlir::createLinalgDetensorizePass) |
| |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass) |
| |
| // Dispatch region formation. |
| // TODO(ravishankarm): Fold ConvertToFlowBefore/ConvertToFlowAfter into |
| // dispatch region formation pass. |
| .addPass(createConvertToFlowBeforeDispatchFormation) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass) |
| .addPass(createDispatchLinalgOnTensorsPass) |
| .addPass(createCaptureDispatchDynamicDimsPass) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(createCSEPass) |
| |
| // Convert remaining ops to Flow ops, after this stage no Linalg ops |
| // should remain. |
| .addPass(createConvertToFlowAfterDispatchFormation) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass); |
| |
| // Module pass to outline the dispatch regions into their own functions |
| // wrapped in executables. |
| passManager.addPass(IREE::Flow::createOutlineDispatchRegionsPass()); |
| |
| // Strip assertions from executables. We could support them with a bunch of |
| // work but our generated executables are designed to be safe in the face of |
| // invalid values and it'd only be useful for debugging. |
| passManager.addNestedPass<IREE::Flow::ExecutableOp>( |
| IREE::Util::createStripDebugOpsPass()); |
| |
| // Cleanup identity ops that clutter up the IR and canonicalize. |
| FunctionLikeNest(passManager).addPass(mlir::createCanonicalizerPass); |
| |
| // Deduplicate executables created from dispatch regions. |
| // Note: this only deduplicates equivalent executables. We could in addition |
| // generalize executables to prune further (e.g. by promoting a dimension to |
| // an argument if two executables differ only in that one dimension). |
| passManager.addPass(IREE::Flow::createDeduplicateExecutablesPass()); |
| |
| // Create one function per remaining flow.executable that can be used with |
| // iree-benchmark-module to benchmark each dispatch individually, as well as |
| // exporting all original model entry points. |
| if (clExportBenchmarkFuncs) { |
| passManager.addPass(IREE::Flow::createExportBenchmarkFuncsPass()); |
| } |
| |
| FunctionLikeNest(passManager) |
| // Inject tracing that logs both input and output tensors from all |
| // dispatches. We do this after deduping so that the executable names |
| // match later stages. |
| .addPredicatedPass(clTraceDispatchTensors, |
| IREE::Flow::createInjectDispatchTracingPass) |
| // Cleanup the IR after we are done. |
| .addPass(IREE::Flow::createCleanupTensorShapesPass) |
| .addPass(mlir::createCanonicalizerPass) |
| .addPass(mlir::createCSEPass); |
| |
| passManager.addNestedPass<IREE::Flow::ExecutableOp>( |
| mlir::createCanonicalizerPass()); |
| passManager.addNestedPass<IREE::Flow::ExecutableOp>(mlir::createCSEPass()); |
| |
| // Symbol DCE any remaining variables/functions that are now no longer |
| // required. |
| passManager.addPass(mlir::createSymbolDCEPass()); |
| } |
| |
| void registerFlowTransformPassPipeline() { |
| PassPipelineRegistration<TransformOptions> transformPassPipeline( |
| "iree-flow-transformation-pipeline", |
| "Runs the full IREE flow dialect transformation pipeline", |
| [](OpPassManager &passManager, const TransformOptions &transformOptions) { |
| buildFlowTransformPassPipeline(passManager, transformOptions); |
| }); |
| } |
| |
| namespace { |
| #define GEN_PASS_REGISTRATION |
| #include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc" // IWYU pragma: export |
| } // namespace |
| |
| /// Test passes. |
| std::unique_ptr<OperationPass<void>> |
| createTestPartitionableLoopsInterfacePass(); |
| |
| /// Register test passes. |
| inline void registerTestPasses() { |
| createTestPartitionableLoopsInterfacePass(); |
| } |
| |
| void registerFlowPasses() { |
| // Generated. |
| registerPasses(); |
| |
| // Test passes. |
| registerTestPasses(); |
| |
| // Pipelines. |
| registerFlowTransformPassPipeline(); |
| } |
| |
| } // namespace Flow |
| } // namespace IREE |
| } // namespace iree_compiler |
| } // namespace mlir |