iree/compiler/Dialect/Flow/Transforms/Passes.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2019 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 #include "iree/compiler/Dialect/Flow/Transforms/Passes.h"

 #include <memory>

 #include "iree/compiler/Dialect/Util/Transforms/Passes.h"
 #include "iree/compiler/Utils/PassUtils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Pass/PassOptions.h"
 #include "mlir/Pass/PassRegistry.h"
 #include "mlir/Transforms/Passes.h"

 // TODO(ravishankarm): Change to a pipeline option.
 static llvm::cl::opt<bool> clExportBenchmarkFuncs(
     "iree-flow-export-benchmark-funcs",
     llvm::cl::desc(
         "Exports one function per original module entry point and "
         "unique flow.executable that dispatches with dummy arguments."),
     llvm::cl::init(false));

 // TODO(ravishankarm): Change to a pipeline option.
 static llvm::cl::opt<bool> clTraceDispatchTensors(
     "iree-flow-trace-dispatch-tensors2",
     llvm::cl::desc(
         "Trace runtime input/output tensors for each dispatch function."),
     llvm::cl::init(false));

 static llvm::cl::opt<bool> clDemoteF32ToF16(
     "iree-flow-demote-f32-to-f16",
     llvm::cl::desc("Convert all f32 ops and values into f16 counterparts "
                    "unconditionally before main flow conversions"),
     llvm::cl::init(false));

 static llvm::cl::opt<bool> clEnableConvToImg2Col(
     "iree-flow-enable-conv-img2col-transform",
     llvm::cl::desc("Enable converting convolution ops to img2col form."),
     llvm::cl::init(false));

 static llvm::cl::opt<bool> clEnablePaddingLinalgOps(
     "iree-flow-enable-padding-linalg-ops",
     llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
                    "flow-padding-size"),
     llvm::cl::init(false));

 static llvm::cl::opt<bool> clEnableFusePaddingIntoConsumerOps(
     "iree-flow-enable-fuse-padding-into-consumer-ops",
     llvm::cl::desc("Enable fusing linalg pad_tensor ops into consumer ops"),
     llvm::cl::init(false));

 static llvm::cl::opt<int> clLinalgOpsPaddingSize(
     "iree-flow-linalg-ops-padding-size",
     llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
                    "flow-padding-size"),
     llvm::cl::init(4));

 // TODO(#1159): enable by default or remove this option once it works on
 //              a broader set of programs
 static llvm::cl::opt<bool> clEnableLinalgDetensorize(
     "iree-flow-enable-linalg-detensorize",
     llvm::cl::desc("Enable detensorizing linalg ops to operate on primitives"),
     llvm::cl::init(false));

 static llvm::cl::opt<std::string> clMmt4dTargetOptions(
     "iree-flow-mmt4d-target-options",
     llvm::cl::desc("Convert linalg.matmul ops to MMT4D ops targetting the "
                    "given architecture"),
     llvm::cl::init(""));

 namespace mlir {
 namespace iree_compiler {
 namespace IREE {
 namespace Flow {

 namespace {

 using FunctionLikeNest = MultiOpNest<func::FuncOp, IREE::Util::InitializerOp>;

 // Subset of the overall pass pipeline for optimizing globals and numerics.
 // We may ultimately break this out separately so creating a syntactic
 // distinction to keep that as an option.
 void buildGlobalOptimizationPassPipeline(
     OpPassManager &mainPassManager, const TransformOptions &transformOptions) {
   OpPassManager pipeline(ModuleOp::getOperationName());

   FunctionLikeNest(pipeline)
       // Simplify util.global accesses early on; this can help with dispatch
       // region formation as redundant store-loads are removed.
       .addPass(IREE::Util::createSimplifyGlobalAccessesPass);

   // Module level cleanup and canonicalization of util.global (and other util
   // ops).
   pipeline.addPass(IREE::Util::createApplyPatternsPass());
   pipeline.addPass(IREE::Util::createFoldGlobalsPass());

   if (transformOptions.constExprHoisting) {
     pipeline.addPass(IREE::Util::createHoistIntoGlobalsPass());
   }

   if (transformOptions.buildConstEvalPassPipeline) {
     transformOptions.buildConstEvalPassPipeline(pipeline);
   }

   if (transformOptions.numericPrecisionReduction) {
     pipeline.addPass(createInferNumericNarrowingPass());
     pipeline.addPass(createOptimizeNumericsPass());
     pipeline.addPass(createCleanupNumericNarrowingPass());
   }

   FunctionLikeNest(pipeline)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass);

   // Add the whole fixed point iterator.
   mainPassManager.addPass(
       IREE::Util::createFixedPointIteratorPass(std::move(pipeline)));
 }

 }  // namespace

 void buildFlowTransformPassPipeline(OpPassManager &passManager,
                                     const TransformOptions &transformOptions) {
   // Special case peephole optimizations.
   FunctionLikeNest(passManager)
       .addPass(IREE::Flow::createConvertConv2D1x1ToMatmulPass)
       .addPredicatedPass(clEnableConvToImg2Col,
                          IREE::Flow::createConvertConv2DToImg2ColPass)
       // Input should now be legal.
       .addPass(IREE::Flow::createVerifyInputLegalityPass)
       // Catch matmul ops before we do anything else with them.
       .addPredicatedPass(
           !clMmt4dTargetOptions.empty(),
           []() {
             return IREE::Flow::createConvertLinalgMatmulToMmt4DPass(
                 clMmt4dTargetOptions);
           })
       // Pad linalg ops
       .addPredicatedPass(clEnablePaddingLinalgOps, []() {
         return IREE::Flow::createPadLinalgOpsToIntegerMultiplePass(
             clLinalgOpsPaddingSize);
       });

   passManager.addPass(mlir::createLinalgNamedOpConversionPass());

   // Expand tensor shapes into SSA values and optimize the whole program.
   // The more we are able to equate shape dimensions at this level the better
   // our fusions will be.
   passManager.addPass(IREE::Flow::createExpandTensorShapesPass());
   buildGlobalOptimizationPassPipeline(passManager, transformOptions);

   FunctionLikeNest(passManager)
       // Pad tensors.
       .addPredicatedPass((!clEnableFusePaddingIntoConsumerOps),
                          IREE::Flow::createPadTensorToSubTensorInsertPass)

       // Preprocess the input to a form more amenable for fusion
       // - Convert all elementwise ops to Linalg
       // - Remove unit-extent dimensions.
       .addPass(mlir::createConvertElementwiseToLinalgPass)
       .addPass(mlir::createLinalgFoldUnitExtentDimsPass)
       .addPass(createInterchangeGenericOpsPass)
       .addPass(memref::createResolveShapedTypeResultDimsPass)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass)

       // Elementwise fusion.
       .addPass(createFusionOfTensorOpsPass)
       .addPredicatedPass(clEnableLinalgDetensorize,
                          mlir::createLinalgDetensorizePass)

       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass)

       // Dispatch region formation.
       // TODO(ravishankarm): Fold ConvertToFlowBefore/ConvertToFlowAfter into
       // dispatch region formation pass.
       .addPass(createConvertToFlowBeforeDispatchFormation)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass)
       .addPass(createDispatchLinalgOnTensorsPass)
       .addPass(createCaptureDispatchDynamicDimsPass)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(createCSEPass)

       // Convert remaining ops to Flow ops, after this stage no Linalg ops
       // should remain.
       .addPass(createConvertToFlowAfterDispatchFormation)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass);

   // Module pass to outline the dispatch regions into their own functions
   // wrapped in executables.
   passManager.addPass(IREE::Flow::createOutlineDispatchRegionsPass());

   // Strip assertions from executables. We could support them with a bunch of
   // work but our generated executables are designed to be safe in the face of
   // invalid values and it'd only be useful for debugging.
   passManager.addNestedPass<IREE::Flow::ExecutableOp>(
       IREE::Util::createStripDebugOpsPass());

   // Cleanup identity ops that clutter up the IR and canonicalize.
   FunctionLikeNest(passManager).addPass(mlir::createCanonicalizerPass);

   // Deduplicate executables created from dispatch regions.
   // Note: this only deduplicates equivalent executables. We could in addition
   // generalize executables to prune further (e.g. by promoting a dimension to
   // an argument if two executables differ only in that one dimension).
   passManager.addPass(IREE::Flow::createDeduplicateExecutablesPass());

   // Create one function per remaining flow.executable that can be used with
   // iree-benchmark-module to benchmark each dispatch individually, as well as
   // exporting all original model entry points.
   if (clExportBenchmarkFuncs) {
     passManager.addPass(IREE::Flow::createExportBenchmarkFuncsPass());
   }

   FunctionLikeNest(passManager)
       // Inject tracing that logs both input and output tensors from all
       // dispatches. We do this after deduping so that the executable names
       // match later stages.
       .addPredicatedPass(clTraceDispatchTensors,
                          IREE::Flow::createInjectDispatchTracingPass)
       // Cleanup the IR after we are done.
       .addPass(IREE::Flow::createCleanupTensorShapesPass)
       .addPass(mlir::createCanonicalizerPass)
       .addPass(mlir::createCSEPass);

   passManager.addNestedPass<IREE::Flow::ExecutableOp>(
       mlir::createCanonicalizerPass());
   passManager.addNestedPass<IREE::Flow::ExecutableOp>(mlir::createCSEPass());

   // Symbol DCE any remaining variables/functions that are now no longer
   // required.
   passManager.addPass(mlir::createSymbolDCEPass());
 }

 void registerFlowTransformPassPipeline() {
   PassPipelineRegistration<TransformOptions> transformPassPipeline(
       "iree-flow-transformation-pipeline",
       "Runs the full IREE flow dialect transformation pipeline",
       [](OpPassManager &passManager, const TransformOptions &transformOptions) {
         buildFlowTransformPassPipeline(passManager, transformOptions);
       });
 }

 namespace {
 #define GEN_PASS_REGISTRATION
 #include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc"  // IWYU pragma: export
 }  // namespace

 /// Test passes.
 std::unique_ptr<OperationPass<void>>
 createTestPartitionableLoopsInterfacePass();

 /// Register test passes.
 inline void registerTestPasses() {
   createTestPartitionableLoopsInterfacePass();
 }

 void registerFlowPasses() {
   // Generated.
   registerPasses();

   // Test passes.
   registerTestPasses();

   // Pipelines.
   registerFlowTransformPassPipeline();
 }

 }  // namespace Flow
 }  // namespace IREE
 }  // namespace iree_compiler
 }  // namespace mlir
	// Copyright 2019 The IREE Authors
	//
	// Licensed under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"

	#include <memory>

	#include "iree/compiler/Dialect/Util/Transforms/Passes.h"
	#include "iree/compiler/Utils/PassUtils.h"
	#include "mlir/Dialect/Func/IR/FuncOps.h"
	#include "mlir/Dialect/Linalg/Passes.h"
	#include "mlir/Dialect/MemRef/Transforms/Passes.h"
	#include "mlir/Pass/PassOptions.h"
	#include "mlir/Pass/PassRegistry.h"
	#include "mlir/Transforms/Passes.h"

	// TODO(ravishankarm): Change to a pipeline option.
	static llvm::cl::opt<bool> clExportBenchmarkFuncs(
	"iree-flow-export-benchmark-funcs",
	llvm::cl::desc(
	"Exports one function per original module entry point and "
	"unique flow.executable that dispatches with dummy arguments."),
	llvm::cl::init(false));

	// TODO(ravishankarm): Change to a pipeline option.
	static llvm::cl::opt<bool> clTraceDispatchTensors(
	"iree-flow-trace-dispatch-tensors2",
	llvm::cl::desc(
	"Trace runtime input/output tensors for each dispatch function."),
	llvm::cl::init(false));

	static llvm::cl::opt<bool> clDemoteF32ToF16(
	"iree-flow-demote-f32-to-f16",
	llvm::cl::desc("Convert all f32 ops and values into f16 counterparts "
	"unconditionally before main flow conversions"),
	llvm::cl::init(false));

	static llvm::cl::opt<bool> clEnableConvToImg2Col(
	"iree-flow-enable-conv-img2col-transform",
	llvm::cl::desc("Enable converting convolution ops to img2col form."),
	llvm::cl::init(false));

	static llvm::cl::opt<bool> clEnablePaddingLinalgOps(
	"iree-flow-enable-padding-linalg-ops",
	llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
	"flow-padding-size"),
	llvm::cl::init(false));

	static llvm::cl::opt<bool> clEnableFusePaddingIntoConsumerOps(
	"iree-flow-enable-fuse-padding-into-consumer-ops",
	llvm::cl::desc("Enable fusing linalg pad_tensor ops into consumer ops"),
	llvm::cl::init(false));

	static llvm::cl::opt<int> clLinalgOpsPaddingSize(
	"iree-flow-linalg-ops-padding-size",
	llvm::cl::desc("Enable padding linalg ops to an integer multiple of "
	"flow-padding-size"),
	llvm::cl::init(4));

	// TODO(#1159): enable by default or remove this option once it works on
	// a broader set of programs
	static llvm::cl::opt<bool> clEnableLinalgDetensorize(
	"iree-flow-enable-linalg-detensorize",
	llvm::cl::desc("Enable detensorizing linalg ops to operate on primitives"),
	llvm::cl::init(false));

	static llvm::cl::opt<std::string> clMmt4dTargetOptions(
	"iree-flow-mmt4d-target-options",
	llvm::cl::desc("Convert linalg.matmul ops to MMT4D ops targetting the "
	"given architecture"),
	llvm::cl::init(""));

	namespace mlir {
	namespace iree_compiler {
	namespace IREE {
	namespace Flow {

	namespace {

	using FunctionLikeNest = MultiOpNest<func::FuncOp, IREE::Util::InitializerOp>;

	// Subset of the overall pass pipeline for optimizing globals and numerics.
	// We may ultimately break this out separately so creating a syntactic
	// distinction to keep that as an option.
	void buildGlobalOptimizationPassPipeline(
	OpPassManager &mainPassManager, const TransformOptions &transformOptions) {
	OpPassManager pipeline(ModuleOp::getOperationName());

	FunctionLikeNest(pipeline)
	// Simplify util.global accesses early on; this can help with dispatch
	// region formation as redundant store-loads are removed.
	.addPass(IREE::Util::createSimplifyGlobalAccessesPass);

	// Module level cleanup and canonicalization of util.global (and other util
	// ops).
	pipeline.addPass(IREE::Util::createApplyPatternsPass());
	pipeline.addPass(IREE::Util::createFoldGlobalsPass());

	if (transformOptions.constExprHoisting) {
	pipeline.addPass(IREE::Util::createHoistIntoGlobalsPass());
	}

	if (transformOptions.buildConstEvalPassPipeline) {
	transformOptions.buildConstEvalPassPipeline(pipeline);
	}

	if (transformOptions.numericPrecisionReduction) {
	pipeline.addPass(createInferNumericNarrowingPass());
	pipeline.addPass(createOptimizeNumericsPass());
	pipeline.addPass(createCleanupNumericNarrowingPass());
	}

	FunctionLikeNest(pipeline)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass);

	// Add the whole fixed point iterator.
	mainPassManager.addPass(
	IREE::Util::createFixedPointIteratorPass(std::move(pipeline)));
	}

	} // namespace

	void buildFlowTransformPassPipeline(OpPassManager &passManager,
	const TransformOptions &transformOptions) {
	// Special case peephole optimizations.
	FunctionLikeNest(passManager)
	.addPass(IREE::Flow::createConvertConv2D1x1ToMatmulPass)
	.addPredicatedPass(clEnableConvToImg2Col,
	IREE::Flow::createConvertConv2DToImg2ColPass)
	// Input should now be legal.
	.addPass(IREE::Flow::createVerifyInputLegalityPass)
	// Catch matmul ops before we do anything else with them.
	.addPredicatedPass(
	!clMmt4dTargetOptions.empty(),
	[]() {
	return IREE::Flow::createConvertLinalgMatmulToMmt4DPass(
	clMmt4dTargetOptions);
	})
	// Pad linalg ops
	.addPredicatedPass(clEnablePaddingLinalgOps, []() {
	return IREE::Flow::createPadLinalgOpsToIntegerMultiplePass(
	clLinalgOpsPaddingSize);
	});

	passManager.addPass(mlir::createLinalgNamedOpConversionPass());

	// Expand tensor shapes into SSA values and optimize the whole program.
	// The more we are able to equate shape dimensions at this level the better
	// our fusions will be.
	passManager.addPass(IREE::Flow::createExpandTensorShapesPass());
	buildGlobalOptimizationPassPipeline(passManager, transformOptions);

	FunctionLikeNest(passManager)
	// Pad tensors.
	.addPredicatedPass((!clEnableFusePaddingIntoConsumerOps),
	IREE::Flow::createPadTensorToSubTensorInsertPass)

	// Preprocess the input to a form more amenable for fusion
	// - Convert all elementwise ops to Linalg
	// - Remove unit-extent dimensions.
	.addPass(mlir::createConvertElementwiseToLinalgPass)
	.addPass(mlir::createLinalgFoldUnitExtentDimsPass)
	.addPass(createInterchangeGenericOpsPass)
	.addPass(memref::createResolveShapedTypeResultDimsPass)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass)

	// Elementwise fusion.
	.addPass(createFusionOfTensorOpsPass)
	.addPredicatedPass(clEnableLinalgDetensorize,
	mlir::createLinalgDetensorizePass)

	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass)

	// Dispatch region formation.
	// TODO(ravishankarm): Fold ConvertToFlowBefore/ConvertToFlowAfter into
	// dispatch region formation pass.
	.addPass(createConvertToFlowBeforeDispatchFormation)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass)
	.addPass(createDispatchLinalgOnTensorsPass)
	.addPass(createCaptureDispatchDynamicDimsPass)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(createCSEPass)

	// Convert remaining ops to Flow ops, after this stage no Linalg ops
	// should remain.
	.addPass(createConvertToFlowAfterDispatchFormation)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass);

	// Module pass to outline the dispatch regions into their own functions
	// wrapped in executables.
	passManager.addPass(IREE::Flow::createOutlineDispatchRegionsPass());

	// Strip assertions from executables. We could support them with a bunch of
	// work but our generated executables are designed to be safe in the face of
	// invalid values and it'd only be useful for debugging.
	passManager.addNestedPass<IREE::Flow::ExecutableOp>(
	IREE::Util::createStripDebugOpsPass());

	// Cleanup identity ops that clutter up the IR and canonicalize.
	FunctionLikeNest(passManager).addPass(mlir::createCanonicalizerPass);

	// Deduplicate executables created from dispatch regions.
	// Note: this only deduplicates equivalent executables. We could in addition
	// generalize executables to prune further (e.g. by promoting a dimension to
	// an argument if two executables differ only in that one dimension).
	passManager.addPass(IREE::Flow::createDeduplicateExecutablesPass());

	// Create one function per remaining flow.executable that can be used with
	// iree-benchmark-module to benchmark each dispatch individually, as well as
	// exporting all original model entry points.
	if (clExportBenchmarkFuncs) {
	passManager.addPass(IREE::Flow::createExportBenchmarkFuncsPass());
	}

	FunctionLikeNest(passManager)
	// Inject tracing that logs both input and output tensors from all
	// dispatches. We do this after deduping so that the executable names
	// match later stages.
	.addPredicatedPass(clTraceDispatchTensors,
	IREE::Flow::createInjectDispatchTracingPass)
	// Cleanup the IR after we are done.
	.addPass(IREE::Flow::createCleanupTensorShapesPass)
	.addPass(mlir::createCanonicalizerPass)
	.addPass(mlir::createCSEPass);

	passManager.addNestedPass<IREE::Flow::ExecutableOp>(
	mlir::createCanonicalizerPass());
	passManager.addNestedPass<IREE::Flow::ExecutableOp>(mlir::createCSEPass());

	// Symbol DCE any remaining variables/functions that are now no longer
	// required.
	passManager.addPass(mlir::createSymbolDCEPass());
	}

	void registerFlowTransformPassPipeline() {
	PassPipelineRegistration<TransformOptions> transformPassPipeline(
	"iree-flow-transformation-pipeline",
	"Runs the full IREE flow dialect transformation pipeline",
	[](OpPassManager &passManager, const TransformOptions &transformOptions) {
	buildFlowTransformPassPipeline(passManager, transformOptions);
	});
	}

	namespace {
	#define GEN_PASS_REGISTRATION
	#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc" // IWYU pragma: export
	} // namespace

	/// Test passes.
	std::unique_ptr<OperationPass<void>>
	createTestPartitionableLoopsInterfacePass();

	/// Register test passes.
	inline void registerTestPasses() {
	createTestPartitionableLoopsInterfacePass();
	}

	void registerFlowPasses() {
	// Generated.
	registerPasses();

	// Test passes.
	registerTestPasses();

	// Pipelines.
	registerFlowTransformPassPipeline();
	}

	} // namespace Flow
	} // namespace IREE
	} // namespace iree_compiler
	} // namespace mlir