Removing a lot of compiler code that was only kept live by VMLA. (#5908)
diff --git a/iree/compiler/Conversion/HLOToHLO/BUILD b/iree/compiler/Conversion/HLOToHLO/BUILD
index e874421..ed1236c 100644
--- a/iree/compiler/Conversion/HLOToHLO/BUILD
+++ b/iree/compiler/Conversion/HLOToHLO/BUILD
@@ -21,7 +21,6 @@
cc_library(
name = "HLOToHLO",
srcs = [
- "DecomposeHLOClamp.cpp",
"DemoteF32ToF16.cpp",
],
hdrs = [
@@ -35,6 +34,5 @@
"@llvm-project//mlir:Pass",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
- "@mlir-hlo//:hlo",
],
)
diff --git a/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt b/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt
index acd6747..68eeb96 100644
--- a/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt
+++ b/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt
@@ -16,7 +16,6 @@
HDRS
"Passes.h"
SRCS
- "DecomposeHLOClamp.cpp"
"DemoteF32ToF16.cpp"
DEPS
LLVMSupport
@@ -26,7 +25,6 @@
MLIRTransformUtils
iree::compiler::Dialect::Flow::IR
iree::compiler::Dialect::IREE::IR
- tensorflow::mlir_hlo
PUBLIC
)
diff --git a/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp b/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp
deleted file mode 100644
index d294d3e..0000000
--- a/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-namespace {
-/// A pass to decompose mhlo.clamp ops into mhlo.compare and
-/// mhlo.select ops.
-class DecomposeClampOp : public OpRewritePattern<mhlo::ClampOp> {
- public:
- using OpRewritePattern<mhlo::ClampOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(mhlo::ClampOp op,
- PatternRewriter &rewriter) const override {
- auto minType = op.min().getType().dyn_cast<RankedTensorType>();
- auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
- auto maxType = op.max().getType().dyn_cast<RankedTensorType>();
-
- if (!operandType) return failure();
-
- // Reject implicitly broadcasted cases. They should be made explicit first.
- if (minType != operandType || maxType != operandType) return failure();
-
- // clamp(a, x, b) = min(max(a, x), b)
- Location loc = op.getLoc();
- Value cmpMin = rewriter.create<mhlo::CompareOp>(
- loc, op.min(), op.operand(), rewriter.getStringAttr("LT"));
- Value selectMin = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMin,
- op.operand(), op.min());
- Value cmpMax = rewriter.create<mhlo::CompareOp>(
- loc, selectMin, op.max(), rewriter.getStringAttr("LT"));
- Value selectMax = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMax,
- selectMin, op.max());
- rewriter.replaceOp(op, selectMax);
- return success();
- }
-};
-
-struct DecomposeHLOClampPass
- : public PassWrapper<DecomposeHLOClampPass, FunctionPass> {
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<mhlo::MhloDialect>();
- }
-
- void runOnFunction() override {
- MLIRContext *context = &getContext();
- OwningRewritePatternList patterns(&getContext());
- patterns.insert<DecomposeClampOp>(context);
- (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
- }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>> createDecomposeHLOClampPass() {
- return std::make_unique<DecomposeHLOClampPass>();
-}
-
-static PassRegistration<DecomposeHLOClampPass> pass(
- "iree-codegen-decompose-hlo-clamp",
- "Decompose HLO clamp op into primitive ops");
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp b/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp
index d465f5f..8a11b0b 100644
--- a/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp
+++ b/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
diff --git a/iree/compiler/Conversion/HLOToHLO/Passes.h b/iree/compiler/Conversion/HLOToHLO/Passes.h
index 5edbcb2..d0da837 100644
--- a/iree/compiler/Conversion/HLOToHLO/Passes.h
+++ b/iree/compiler/Conversion/HLOToHLO/Passes.h
@@ -30,9 +30,6 @@
namespace mlir {
namespace iree_compiler {
-/// Creates a pass to decompose XLA-HLO clamp ops into primitive ops.
-std::unique_ptr<OperationPass<FuncOp>> createDecomposeHLOClampPass();
-
/// Create a pass to convert a model using f32 type to the equivalent one
/// using 16.
std::unique_ptr<OperationPass<ModuleOp>> createDemoteF32ToF16Pass();
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/BUILD b/iree/compiler/Conversion/HLOToLinalg/test/BUILD
index 063de7e..2998812 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/BUILD
+++ b/iree/compiler/Conversion/HLOToLinalg/test/BUILD
@@ -28,7 +28,6 @@
srcs = enforce_glob(
[
"concatenate.mlir",
- "decompose_hlo_clamp.mlir",
"dynamic_shape.mlir",
"fft.mlir",
"fusion.mlir",
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt b/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt
index af81009..f4fc612 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt
+++ b/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt
@@ -15,7 +15,6 @@
lit
SRCS
"concatenate.mlir"
- "decompose_hlo_clamp.mlir"
"dynamic_shape.mlir"
"fft.mlir"
"fusion.mlir"
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir b/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
deleted file mode 100644
index a706486..0000000
--- a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-codegen-decompose-hlo-clamp %s | IreeFileCheck %s
-
-// CHECK-LABEL: func @clamp
-// CHECK-SAME: (%[[MIN:.+]]: tensor<4xf32>, %[[INPUT:.+]]: tensor<4xf32>, %[[MAX:.+]]: tensor<4xf32>)
-func @clamp(%min: tensor<4xf32>, %value: tensor<4xf32>, %max: tensor<4xf32>) -> tensor<4xf32> {
- // CHECK: %[[CMP_MIN:.+]] = "mhlo.compare"(%[[MIN]], %[[INPUT]]) {comparison_direction = "LT"}
- // CHECK: %[[SLT_MIN:.+]] = "mhlo.select"(%[[CMP_MIN]], %[[INPUT]], %[[MIN]])
- // CHECK: %[[CMP_MAX:.+]] = "mhlo.compare"(%[[SLT_MIN]], %[[MAX]]) {comparison_direction = "LT"}
- // CHECK: %[[SLT_MAX:.+]] = "mhlo.select"(%[[CMP_MAX]], %[[SLT_MIN]], %[[MAX]])
- // CHECK: return %[[SLT_MAX]]
- %0 = "mhlo.clamp"(%min, %value, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
-}
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 42e8e0b..c5453c1 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h
@@ -44,7 +44,6 @@
inline void registerHLOToLinalgPasses() {
static bool init_once = []() {
- createDecomposeHLOClampPass();
createHLOToLinalgOnTensorsPass();
createDemoteF32ToF16Pass();
return true;
diff --git a/iree/compiler/Dialect/Flow/Analysis/BUILD b/iree/compiler/Dialect/Flow/Analysis/BUILD
deleted file mode 100644
index ccda835..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/BUILD
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-package(
- default_visibility = ["//visibility:public"],
- features = ["layering_check"],
- licenses = ["notice"], # Apache 2.0
-)
-
-cc_library(
- name = "Analysis",
- srcs = [
- "Dispatchability.cpp",
- "DispatchabilityTest.cpp",
- ],
- hdrs = [
- "Dispatchability.h",
- "TestPasses.h",
- ],
- deps = [
- "//iree/compiler/Dialect/Flow/IR",
- "//iree/compiler/Dialect/Flow/Utils",
- "//iree/compiler/Dialect/IREE/IR",
- "@llvm-project//llvm:Support",
- "@llvm-project//mlir:IR",
- "@llvm-project//mlir:Pass",
- "@llvm-project//mlir:StandardOps",
- "@llvm-project//mlir:Support",
- "@mlir-hlo//:hlo",
- ],
-)
diff --git a/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt b/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt
deleted file mode 100644
index db8f0fc..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
-# iree/compiler/Dialect/Flow/Analysis/BUILD #
-# #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
-# CMake-only content. #
-# #
-# To disable autogeneration for this file entirely, delete this header. #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_cc_library(
- NAME
- Analysis
- HDRS
- "Dispatchability.h"
- "TestPasses.h"
- SRCS
- "Dispatchability.cpp"
- "DispatchabilityTest.cpp"
- DEPS
- LLVMSupport
- MLIRIR
- MLIRPass
- MLIRStandard
- MLIRSupport
- iree::compiler::Dialect::Flow::IR
- iree::compiler::Dialect::Flow::Utils
- iree::compiler::Dialect::IREE::IR
- tensorflow::mlir_hlo
- PUBLIC
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
deleted file mode 100644
index 53b2b6f..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-
-#include <list>
-
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "llvm/ADT/SetVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/SymbolTable.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// static
-LogicalResult Dispatchability::annotateIR(ModuleOp moduleOp) {
- Dispatchability dispatchability;
- if (failed(dispatchability.recalculate(moduleOp))) {
- moduleOp.emitOpError()
- << "failed to analyze dispatchability for the module";
- return failure();
- }
-
- Builder builder(moduleOp.getContext());
- SymbolTable symbolTable(moduleOp);
- for (auto &funcDispatchability : dispatchability.funcDispatchability_) {
- auto funcOp = symbolTable.lookup<FuncOp>(funcDispatchability.first);
- funcOp->setAttr("dispatchable",
- builder.getBoolAttr(funcDispatchability.second));
- }
-
- return success();
-}
-
-LogicalResult Dispatchability::recalculate(ModuleOp moduleOp) {
- funcDispatchability_.clear();
- funcCloneModuleOp_ = ModuleOp::create(UnknownLoc::get(moduleOp.getContext()));
- funcClones_.clear();
-
- // Run through all functions until we are able to compute their
- // dispatchability. We do this so that we can determine if calls are allowed.
- OpBuilder cloneBuilder(funcCloneModuleOp_.get());
- std::vector<FuncOp> nextWorklist(moduleOp.getOps<FuncOp>().begin(),
- moduleOp.getOps<FuncOp>().end());
- std::vector<FuncOp> worklist;
- bool anyChanged;
- do {
- anyChanged = false;
- worklist.swap(nextWorklist);
- nextWorklist.clear();
- for (auto funcOp : worklist) {
- auto isDispatchable = computeDispatchability(funcOp);
- if (isDispatchable.hasValue()) {
- funcDispatchability_[funcOp.getName()] = isDispatchable.getValue();
- if (isDispatchable.getValue()) {
- auto clonedFuncOp = cast<FuncOp>(cloneBuilder.clone(*funcOp));
- funcClones_[funcOp.getName()] = clonedFuncOp;
- funcCloneModuleOp_->push_back(clonedFuncOp);
- }
- anyChanged = true;
- } else {
- nextWorklist.push_back(funcOp);
- }
- }
- } while (anyChanged);
- if (!nextWorklist.empty()) {
- return moduleOp.emitError() << "cycle detected in dispatchability analysis";
- }
-
- return success();
-}
-
-Optional<bool> Dispatchability::computeDispatchability(FuncOp funcOp) {
- if (funcOp.isExternal()) {
- // We assume all imports have side-effects right now, but that may not be
- // the case. We should add an attribute and check for it.
- return false;
- }
-
- // TODO(b/144530470): replace with tablegen attributes/interfaces.
- for (auto &block : funcOp.getBlocks()) {
- for (auto &op : block.getOperations()) {
- if (!IREE::Flow::isOpOfKnownDialect(&op)) {
- // Custom dialects aren't dispatchable (yet).
- return false;
- } else if (auto callOp = dyn_cast<CallOp>(op)) {
- if (callOp.getCallee() == funcOp.getName()) {
- // Recursion.
- continue;
- }
- auto it = funcDispatchability_.find(callOp.callee());
- if (it == funcDispatchability_.end()) {
- // Not yet calculated - yield.
- return llvm::None;
- }
- return it->second;
- } else if (isa<CallIndirectOp>(op)) {
- // Indirect calls are not supported and must first be devirtualized.
- return false;
- } else if (isa<mlir::ReturnOp>(op)) {
- // TODO(benvanik): widen to all known terminators? sometimes they may
- // have side-effects.
- continue;
- } else if (isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op)) {
- // Some unfusable ops must remain on their own.
- return false;
- } else if (isa<mhlo::ReduceOp>(op) || isa<mhlo::ReduceWindowOp>(op)) {
- // Reductions always become flow ops.
- return false;
-
- // TODO: Properly handle region side effects.
- } else if (!MemoryEffectOpInterface::hasNoEffect(&op) ||
- op.getNumRegions() != 0) {
- // Ops with side-effects cannot be dispatched as we must be able to
- // exactly model I/O.
- return false;
- }
- }
- }
-
- // All cases not handled above are (probably) dispatchable. This makes what we
- // do here a blocklist, though as we move towards more frontend dialects that
- // may not be the best idea.
- return true;
-}
-
-void Dispatchability::walkDispatchableOps(
- function_ref<void(FuncOp funcOp)> fn) {
- for (auto funcOp : funcClones_) {
- fn(funcOp.second);
- }
-}
-
-bool Dispatchability::isDispatchable(StringRef funcName) {
- return funcDispatchability_[funcName];
-}
-
-bool Dispatchability::isDispatchable(FuncOp funcOp) {
- return isDispatchable(funcOp.getName());
-}
-
-bool Dispatchability::isInvalidated(
- const AnalysisManager::PreservedAnalyses &pa) {
- return false;
-}
-
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h
deleted file mode 100644
index bfbb4f9..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_
-#define IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_
-
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/AnalysisManager.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Analyzes functions in a module to determine whether they can be performed as
-// part of a dispatch operation. Functions must meet a set of criteria defining
-// "dispatchability" such as the lack of side effects.
-class Dispatchability {
- public:
- // Annotates the IR with the dispatchability information. This is only
- // required if the dispatchability information is interesting to persist
- // beyond transformation, such as in tests.
- static LogicalResult annotateIR(ModuleOp moduleOp);
-
- Dispatchability() = default;
- explicit Dispatchability(Operation *op) {
- (void)recalculate(cast<ModuleOp>(op));
- }
- Dispatchability(Dispatchability &&) = default;
- Dispatchability &operator=(Dispatchability &&) = default;
- Dispatchability(const Dispatchability &) = delete;
- Dispatchability &operator=(const Dispatchability &) = delete;
-
- // Recalculates the dispatchability information for the given module.
- LogicalResult recalculate(ModuleOp moduleOp);
-
- // Calls |fn| for each dispatchable function.
- void walkDispatchableOps(function_ref<void(FuncOp funcOp)> fn);
-
- // Returns true if |funcOp| is dispatchable.
- bool isDispatchable(StringRef funcName);
- bool isDispatchable(FuncOp funcOp);
- bool isInvalidated(const AnalysisManager::PreservedAnalyses &pa);
-
- private:
- // Returns true if the given function is dispatch compatible.
- // Returns None if the dispatchability can't yet be calculated as dependent
- // functions have not been processed.
- Optional<bool> computeDispatchability(FuncOp funcOp);
-
- DenseMap<StringRef, bool> funcDispatchability_;
- OwningModuleRef funcCloneModuleOp_;
- DenseMap<StringRef, FuncOp> funcClones_;
-};
-
-} // namespace iree_compiler
-} // namespace mlir
-
-#endif // IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_
diff --git a/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp b/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp
deleted file mode 100644
index a25b9b1..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-class DispatchabilityTestPass
- : public PassWrapper<DispatchabilityTestPass, OperationPass<ModuleOp>> {
- public:
- void runOnOperation() override {
- if (failed(Dispatchability::annotateIR(getOperation()))) {
- signalPassFailure();
- }
- }
-};
-
-namespace IREE {
-namespace Flow {
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityTestPass() {
- return std::make_unique<DispatchabilityTestPass>();
-}
-} // namespace Flow
-} // namespace IREE
-
-static PassRegistration<DispatchabilityTestPass> pass(
- "test-iree-flow-dispatchability",
- "Test pass used for dispatchability analysis");
-
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Analysis/TestPasses.h b/iree/compiler/Dialect/Flow/Analysis/TestPasses.h
deleted file mode 100644
index 4e54995..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/TestPasses.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
-#define IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-//===----------------------------------------------------------------------===//
-// Test passes
-//===----------------------------------------------------------------------===//
-
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityTestPass();
-
-//===----------------------------------------------------------------------===//
-// Register all analysis passes
-//===----------------------------------------------------------------------===//
-
-inline void registerFlowAnalysisTestPasses() {
- createDispatchabilityTestPass();
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
-
-#endif // IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
\ No newline at end of file
diff --git a/iree/compiler/Dialect/Flow/Analysis/test/BUILD b/iree/compiler/Dialect/Flow/Analysis/test/BUILD
deleted file mode 100644
index 8f34c74..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/BUILD
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-load("//iree:lit_test.bzl", "iree_lit_test_suite")
-load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
-
-package(
- default_visibility = ["//visibility:public"],
- features = ["layering_check"],
- licenses = ["notice"], # Apache 2.0
-)
-
-iree_lit_test_suite(
- name = "lit",
- srcs = enforce_glob(
- ["dispatchability.mlir"],
- include = ["*.mlir"],
- ),
- data = [
- "//iree/tools:IreeFileCheck",
- "//iree/tools:iree-opt",
- ],
-)
diff --git a/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt
deleted file mode 100644
index 26aafe0..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
-# iree/compiler/Dialect/Flow/Analysis/test/BUILD #
-# #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
-# CMake-only content. #
-# #
-# To disable autogeneration for this file entirely, delete this header. #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_lit_test_suite(
- NAME
- lit
- SRCS
- "dispatchability.mlir"
- DATA
- iree::tools::IreeFileCheck
- iree::tools::iree-opt
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir b/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
deleted file mode 100644
index 9fabb11..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
+++ /dev/null
@@ -1,92 +0,0 @@
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file -test-iree-flow-dispatchability %s | IreeFileCheck %s
-
-// CHECK-LABEL: @empty
-// CHECK-SAME: dispatchable = true
-func @empty() {
- return
-}
-
-// -----
-
-// CHECK-LABEL: @customOp
-// CHECK-SAME: dispatchable = false
-func @customOp() {
- "do.foo"() : () -> ()
- return
-}
-
-// -----
-
-// CHECK-LABEL: @simpleMath
-// CHECK-SAME: dispatchable = true
-func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @stdElementwiseOps
-// CHECK-SAME: dispatchable = true
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = addf %arg0, %arg0 : tensor<4xf32>
- %1 = subf %0, %arg0 : tensor<4xf32>
- %2 = mulf %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @hloElementwiseOps
-// CHECK-SAME: dispatchable = true
-func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @interleavedDot
-// CHECK-SAME: dispatchable = false
-func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
- %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
- return %2 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @caller
-// CHECK-SAME: dispatchable = true
-func @caller(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
-}
-// CHECK-LABEL: func @callee
-// CHECK-SAME: dispatchable = true
-func @callee(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
- return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @dotCaller
-// CHECK-SAME: dispatchable = false
-func @dotCaller(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
- %1 = call @dotCallee(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
- return %2 : tensor<4x4xf32>
-}
-// CHECK-LABEL: func @dotCallee
-// CHECK-SAME: dispatchable = false
-func @dotCallee(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %0 = "mhlo.dot"(%arg0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index e1e3d54..8c0a10a 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
@@ -350,11 +350,6 @@
// Dispatch ops
//===----------------------------------------------------------------------===//
-void DispatchRegionOp::getCanonicalizationPatterns(
- OwningRewritePatternList &results, MLIRContext *context) {
- results.insert<ClosureOptimizationPattern<DispatchRegionOp>>(context);
-}
-
void DispatchWorkgroupsOp::getCanonicalizationPatterns(
OwningRewritePatternList &results, MLIRContext *context) {
results.insert<ClosureOptimizationPattern<DispatchWorkgroupsOp>>(context);
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h b/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h
index 2e2da8a..f38170b 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h
+++ b/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h
@@ -53,13 +53,6 @@
// performed on the op. Returns true if the op was optimized.
bool optimizeClosureLikeOp(ClosureOpInterface &closureOp,
PatternRewriter *rewriter = nullptr);
-template <typename T>
-inline bool optimizeClosureOp(T &op, PatternRewriter *rewriter = nullptr) {
- auto closureOp = cast<ClosureOpInterface>(op.getOperation());
- bool didOptimize = optimizeClosureLikeOp(closureOp, rewriter);
- op = dyn_cast_or_null<DispatchRegionOp>(closureOp.getOperation());
- return didOptimize;
-}
// A pattern that optimizes the given region-containing op T (CSE, DCE, etc).
// Duplicate operands will be combined and unused operands and results will be
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index 51f409e..6fb8e8f 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
@@ -561,316 +561,6 @@
}
//===----------------------------------------------------------------------===//
-// flow.dispatch.region
-//===----------------------------------------------------------------------===//
-
-/// Inlines operation |op| into the |dispatchRegionOp| by making all operands,
-/// as well as values caputred implicitly by the regions of the operation, that
-/// are outside the dispatch region operands of the dispatch region as well.
-static Operation *inlineOpIntoDispatchRegion(OpBuilder &builder,
- DispatchRegionOp dispatchRegionOp,
- Operation *op,
- BlockAndValueMapping &map) {
- llvm::SetVector<Value> capturedInputs(op->getOperands().begin(),
- op->getOperands().end());
- getUsedValuesDefinedAbove(op->getRegions(), capturedInputs);
- Block *block = builder.getInsertionBlock();
- for (Value capturedInput : capturedInputs) {
- if (map.contains(capturedInput)) continue;
- dispatchRegionOp.getOperation()->insertOperands(
- dispatchRegionOp.getOperation()->getNumOperands(), {capturedInput});
- Value newBlockArgument = block->addArgument(capturedInput.getType());
- map.map(capturedInput, newBlockArgument);
- }
-
- return builder.clone(*op, map);
-}
-
-llvm::Optional<std::pair<DispatchRegionOp, Operation *>>
-DispatchRegionOp::formFromAnchorOp(Value workload, Operation *anchorOp,
- OpBuilder &builder) {
- builder.setInsertionPoint(anchorOp);
- auto loc = anchorOp->getLoc();
- // Map anchor into new dispatch region.
- auto drOp = builder.create<DispatchRegionOp>(
- loc, llvm::to_vector<1>(anchorOp->getResultTypes()), workload,
- ArrayRef<Value>());
- auto *drBlock = new Block();
- drOp.body().push_back(drBlock);
- BlockAndValueMapping mapping;
- builder.setInsertionPointToEnd(drBlock);
- Operation *newAnchorOp =
- inlineOpIntoDispatchRegion(builder, drOp, anchorOp, mapping);
-
- // Insert terminator
- builder.create<IREE::Flow::ReturnOp>(loc, newAnchorOp->getResults());
-
- // Replace anchor uses with region result.
- for (auto it : llvm::enumerate(anchorOp->getResults())) {
- it.value().replaceAllUsesWith(drOp.getResult(it.index()));
- }
- anchorOp->erase();
- return std::make_pair(drOp, newAnchorOp);
-}
-
-// Clones an operation with new result types.
-// The original operation will be erased and a new operation constructed
-// in its place.
-static Operation *cloneWithNewResultTypes(Operation *op,
- TypeRange newResultTypes) {
- OperationState state(op->getLoc(), op->getName());
- state.addOperands(op->getOperands());
- state.addTypes(newResultTypes);
- state.addSuccessors(op->getSuccessors());
- state.addAttributes(op->getAttrs());
- for (unsigned i = 0, e = op->getNumRegions(); i < e; ++i) {
- state.addRegion();
- }
- Operation *newOp = Operation::create(state);
- for (unsigned i = 0, e = op->getNumRegions(); i < e; ++i) {
- newOp->getRegion(i).takeBody(op->getRegion(i));
- }
- return newOp;
-}
-
-ResultRange DispatchRegionOp::appendResults(DispatchRegionOp &self,
- ValueRange addlResults,
- OpBuilder &builder) {
- Block &block = self.body().front();
-
- unsigned origNumResults = self.getNumResults();
- llvm::SmallVector<Type, 4> newTypes(self.getResultTypes().begin(),
- self.getResultTypes().end());
- for (auto r : addlResults) newTypes.push_back(r.getType());
-
- // Changing the arity of the results requires replacing the dispatch region.
- builder.setInsertionPoint(self);
- auto newDrOp = llvm::cast<DispatchRegionOp>(
- builder.insert(cloneWithNewResultTypes(self, newTypes)));
- self.replaceAllUsesWith(newDrOp->getResults().take_front(origNumResults));
- self.erase();
- self = newDrOp;
-
- // Add results to the terminator.
- auto terminator = block.getTerminator();
- llvm::SmallVector<Value, 4> returns(terminator->getOperands());
- returns.append(addlResults.begin(), addlResults.end());
- terminator->setOperands(returns);
-
- return self->getResults().slice(origNumResults, addlResults.size());
-}
-
-Operation *DispatchRegionOp::inlineOp(Operation *origOp, OpBuilder &builder,
- bool positionAtEnd) {
- Block &block = body().front();
- if (positionAtEnd) {
- builder.setInsertionPoint(block.getTerminator());
- } else {
- builder.setInsertionPointToStart(&block);
- }
- // Map existing dr args.
- BlockAndValueMapping mapping;
- for (unsigned i = 0, e = block.getNumArguments(); i < e; ++i) {
- mapping.map(args()[i], block.getArgument(i));
- }
-
- // Also map any terminator operands to support inlining at the end.
- for (auto it : llvm::enumerate(block.getTerminator()->getOperands())) {
- mapping.map(getResult(it.index()), it.value());
- }
-
- // Remember the values corresponding to original op results.
- llvm::SmallVector<Value, 4> origOpResultValues;
- for (Value result : origOp->getResults()) {
- origOpResultValues.push_back(mapping.lookupOrNull(result));
- }
-
- Operation *inlinedOp =
- inlineOpIntoDispatchRegion(builder, *this, origOp, mapping);
-
- // Replace any results from the orig with results from the clone.
- for (unsigned i = 0, e = origOp->getNumResults(); i < e; ++i) {
- Value resultTo = origOpResultValues[i];
- if (resultTo) {
- resultTo.replaceAllUsesWith(inlinedOp->getResult(i));
- }
- }
-
- return inlinedOp;
-}
-
-void DispatchRegionOp::build(OpBuilder &builder, OperationState &state,
- ArrayRef<Type> resultTypes, Value workload,
- ValueRange args,
- ArrayRef<NamedAttribute> attributes) {
- state.addTypes(resultTypes);
- state.addOperands({workload});
- state.addOperands(args);
- state.addAttributes(attributes);
- state.addRegion();
-}
-
-ParseResult parseDispatchRegionOp(OpAsmParser &parser, OperationState *result) {
- // Parse required workload.
- OpAsmParser::OperandType workloadArg;
- Type workloadArgType;
- if (failed(parser.parseLSquare()) ||
- failed(parser.parseOperand(workloadArg)) ||
- failed(parser.parseColonType(workloadArgType)) ||
- failed(parser.parseRSquare()) ||
- failed(parser.resolveOperand(workloadArg, workloadArgType,
- result->operands))) {
- return failure();
- }
-
- // Parse (optional) args.
- SmallVector<OpAsmParser::OperandType, 16> regionArgs;
- SmallVector<Type, 16> regionArgTypes;
- if (failed(parser.parseLParen())) {
- return failure();
- }
- if (failed(parser.parseOptionalRParen())) {
- SmallVector<OpAsmParser::OperandType, 16> regionOperands;
- auto argsLoc = parser.getCurrentLocation();
- do {
- // Reserve entries in the lists.
- regionArgs.emplace_back();
- regionOperands.emplace_back();
- regionArgTypes.emplace_back();
- if (failed(parser.parseRegionArgument(regionArgs.back())) ||
- failed(parser.parseEqual()) ||
- failed(parser.parseOperand(regionOperands.back())) ||
- failed(parser.parseColonType(regionArgTypes.back()))) {
- return failure();
- }
- } while (succeeded(parser.parseOptionalComma()));
- if (failed(parser.parseRParen()) ||
- failed(parser.resolveOperands(regionOperands, regionArgTypes, argsLoc,
- result->operands))) {
- return failure();
- }
- }
-
- // Parse (optional) results.
- if (failed(parser.parseOptionalArrowTypeList(result->types))) {
- return failure();
- }
-
- // Parse region body.
- Region *body = result->addRegion();
- if (failed(parser.parseRegion(*body, regionArgs, regionArgTypes)) ||
- failed(parser.parseOptionalAttrDict(result->attributes))) {
- return failure();
- }
- return success();
-}
-
-void printDispatchRegionOp(OpAsmPrinter &p, DispatchRegionOp op) {
- p << op.getOperationName();
-
- // Print the workload argument.
- p << "[";
- p.printOperand(op.workload());
- p << " : ";
- p.printType(op.workload().getType());
- p << "]";
-
- // Print the data argument remapping.
- p << "(";
- interleaveComma(llvm::zip(op.body().getArguments(), op.args()), p,
- [&](std::tuple<BlockArgument, Value> it) {
- p << std::get<0>(it) << " = " << std::get<1>(it);
- p << " : ";
- p << std::get<1>(it).getType();
- });
- p << ")";
-
- // Print the result types, if any.
- if (op.getNumResults() > 0) {
- p << " -> (";
- interleaveComma(op.getResultTypes(), p);
- p << ")";
- }
-
- p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
- p.printOptionalAttrDict(op->getAttrs(),
- /*elidedAttrs=*/{});
-}
-
-Operation::operand_range DispatchRegionOp::getClosureOperands() {
- return args();
-}
-
-Operation::result_range DispatchRegionOp::getClosureResults() {
- return results();
-}
-
-// TODO(#4897): allow non-splat constants - current paths can't handle them.
-static bool canDispatchRegionContainOpIssue4897(Operation *op) {
- if (auto constantOp = dyn_cast<ConstantOp>(op)) {
- auto constantValueAttr = constantOp.getValue();
- auto constantType = constantOp.getType();
- if (constantValueAttr.isa<SplatElementsAttr>()) {
- return true;
- } else if (auto denseAttr =
- constantValueAttr.dyn_cast<DenseElementsAttr>()) {
- return denseAttr.isSplat();
- } else if (constantType.isIntOrIndexOrFloat()) {
- return true;
- }
- }
- return false;
-}
-
-// Inline operations that the dispatch region can handle natively.
-static bool canDispatchRegionContainOp(Operation *op) {
- // Inline constant operations that are splat or small constants.
- if (auto constantOp = dyn_cast<ConstantOp>(op)) {
- auto constantValueAttr = constantOp.getValue();
- auto constantType = constantOp.getType();
- if (constantValueAttr.isa<SplatElementsAttr>()) {
- return true;
- } else if (auto denseAttr =
- constantValueAttr.dyn_cast<DenseElementsAttr>()) {
- // TODO(GH-4897): Non-splat constants seems to have an issue on the LLLVM
- // side. Uncomment after that is fixed.
- auto shapedType = constantOp.getType().cast<ShapedType>();
- uint64_t estimatedByteLength =
- (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) /
- 8;
- return denseAttr.isSplat() ||
- estimatedByteLength <= clInlineConstantByteLength;
- } else if (constantType.isIntOrIndexOrFloat()) {
- return true;
- }
- }
- return false;
-}
-
-bool DispatchRegionOp::canClosureContainOp(Operation *op) {
- return canDispatchRegionContainOpIssue4897(op);
-}
-
-ClosureOpInterface
-DispatchRegionOp::cloneReplacementExcludingOperandsAndResults(
- ArrayRef<unsigned> excludedOperandIndices,
- ArrayRef<unsigned> excludedResultIndices) {
- SmallVector<Type, 4> newResultTypes = llvm::to_vector<4>(getResultTypes());
- SmallVector<Value, 4> newOperandsValues = llvm::to_vector<4>(args());
- excludeClosureOperandsAndResults(newOperandsValues, excludedOperandIndices,
- newResultTypes, excludedResultIndices);
- auto newOp = OpBuilder(getContext())
- .create<DispatchRegionOp>(getLoc(), newResultTypes,
- workload(), newOperandsValues,
- getOperation()->getAttrs());
- auto &newBody = newOp.getClosureBodyRegion();
- newBody.takeBody(getClosureBodyRegion());
- eraseRegionResults(newBody, excludedResultIndices);
- newBody.front().eraseArguments(excludedOperandIndices);
- return newOp;
-}
-
-//===----------------------------------------------------------------------===//
// flow.dispatch.tensor.load
//===----------------------------------------------------------------------===//
@@ -1092,6 +782,31 @@
return results();
}
+// Inline operations that the dispatch region can handle natively.
+static bool canDispatchRegionContainOp(Operation *op) {
+ // Inline constant operations that are splat or small constants.
+ if (auto constantOp = dyn_cast<ConstantOp>(op)) {
+ auto constantValueAttr = constantOp.getValue();
+ auto constantType = constantOp.getType();
+ if (constantValueAttr.isa<SplatElementsAttr>()) {
+ return true;
+ } else if (auto denseAttr =
+ constantValueAttr.dyn_cast<DenseElementsAttr>()) {
+ // TODO(GH-4897): Non-splat constants seems to have an issue on the LLVM
+ // side. Uncomment after that is fixed.
+ auto shapedType = constantOp.getType().cast<ShapedType>();
+ uint64_t estimatedByteLength =
+ (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) /
+ 8;
+ return denseAttr.isSplat() ||
+ estimatedByteLength <= clInlineConstantByteLength;
+ } else if (constantType.isIntOrIndexOrFloat()) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool DispatchWorkgroupsOp::canClosureContainOp(Operation *op) {
return canDispatchRegionContainOp(op);
}
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td
index d946a36..7910ae4 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -174,76 +174,6 @@
// Partitioned regions
//===----------------------------------------------------------------------===//
-def FLOW_DispatchRegionOp : FLOW_PureOp<"dispatch.region", [
- IsolatedFromAbove,
- DeclareOpInterfaceMethods<FLOW_ClosureOpInterface>,
- ]> {
- let summary = [{partitioned region representing a dispatched workload}];
- let description = [{
- A closure that represents a functional dispatch unit. These perform
- computations in a way that can be lowered to target executable formats such
- as SPIR-V for execution.
-
- Ops that are identified as "dispatchable" are grouped into dispatch regions
- and compatible dispatch regions are folded together. What remains outside of
- the dispatch regions is the glue required to schedule the work (commonly
- referred to as "host" code, even if it doesn't run on an AP).
-
- Dispatch regions are modeled using value semantics: it is assumed that all
- arguments are read-only and that the dispatch regions themselves have no
- side-effects.
- }];
-
- let arguments = (ins
- FLOW_Workload:$workload,
- Variadic<AnyType>:$args
- );
- let results = (outs
- Variadic<AnyType>:$results
- );
-
- let regions = (region AnyRegion:$body);
-
- let extraClassDeclaration = [{
- /// Forms a dispatch region around a given anchor operation, returning
- /// the new DispatchRegionOp and anchor operation within the region.
- /// Returns llvm::None on failure.
- /// The insertion point of the OpBuilder will be modified.
- static llvm::Optional<std::pair<DispatchRegionOp, Operation *>>
- formFromAnchorOp(Value workload, Operation *anchorOp,
- OpBuilder &builder);
-
- // Appends results to the dispatch region. This will re-allocate the
- // DispatchRegionOp itself but preserve the contained body block.
- // Returns a ResultRange for the new dispatch region op's results
- // corresponding to addlResults.
- static ResultRange appendResults(
- DispatchRegionOp &self, ValueRange addlResults, OpBuilder &builder);
-
- /// Returns the index of the args() operand in the Operation operands list.
- unsigned mapArgOperandToOpOperand(unsigned i) { return i + 1; }
-
- /// Inlines an op into the dispatch region.
- /// By default, this will inline the op at the beginning of the region.
- /// Set positionAtEnd=true to inline at the end. This is not a general
- /// IR splicing helper: it can only inline ops with inputs that map to
- /// either captured operands or results and is used to coelesce an op
- /// into an adjacent dispatch region.
- /// Note that the original op is cloned but not erased. It is up to the
- /// caller to cleanup the original op as needed.
- Operation *inlineOp(Operation *origOp, OpBuilder &builder,
- bool positionAtEnd=false);
- }];
-
- let skipDefaultBuilders = 1;
- let builders = [
- OpBuilder<(ins "ArrayRef<Type>":$resultTypes, "Value":$workload,
- "ValueRange":$args, CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
- ];
-
- let hasCanonicalizer = 1;
-}
-
def FLOW_DispatchWorkgroupsOp : FLOW_PureOp<"dispatch.workgroups", [
IsolatedFromAbove,
AttrSizedOperandSegments,
diff --git a/iree/compiler/Dialect/Flow/IR/test/BUILD b/iree/compiler/Dialect/Flow/IR/test/BUILD
index e7c420a..997322b 100644
--- a/iree/compiler/Dialect/Flow/IR/test/BUILD
+++ b/iree/compiler/Dialect/Flow/IR/test/BUILD
@@ -26,8 +26,6 @@
srcs = enforce_glob(
[
"dispatch_ops.mlir",
- "dispatch_region_folding.mlir",
- "dispatch_regions.mlir",
"dispatch_tensor_folding.mlir",
"dispatch_workgroups.mlir",
"dispatch_workgroups_folding.mlir",
diff --git a/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt
index 0c2f8a3..7ad3dd3 100644
--- a/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt
@@ -15,8 +15,6 @@
lit
SRCS
"dispatch_ops.mlir"
- "dispatch_region_folding.mlir"
- "dispatch_regions.mlir"
"dispatch_tensor_folding.mlir"
"dispatch_workgroups.mlir"
"dispatch_workgroups_folding.mlir"
diff --git a/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir
deleted file mode 100644
index fb678b8..0000000
--- a/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir
+++ /dev/null
@@ -1,19 +0,0 @@
-// RUN: iree-opt -split-input-file -canonicalize %s | iree-opt -split-input-file | IreeFileCheck %s
-
-// CHECK-LABEL: @dceOperandsAndResults
-func @dceOperandsAndResults(%arg0 : tensor<?xf32>) -> (tensor<?xf32>) {
- // CHECK: %[[WORKLOAD:.+]] = constant 5
- %workload = constant 5 : index
- // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD]] : index]
- // CHECK-SAME: (%[[CA1:.+]] = %arg0 : tensor<?xf32>) -> (tensor<?xf32>)
- // CHECK: %[[DR0:.+]] = addf %[[CA1]], %[[CA1]]
- // CHECK: flow.return %[[DR0]] : tensor<?xf32>
- %ret0, %ret1 = flow.dispatch.region[%workload : index](
- %i0 = %arg0 : tensor<?xf32>, %i1 = %arg0 : tensor<?xf32>, %i2 = %arg0 : tensor<?xf32>)
- -> (tensor<?xf32>, tensor<?xf32>) {
- %1 = addf %i0, %i1 : tensor<?xf32>
- flow.return %1, %i2 : tensor<?xf32>, tensor<?xf32>
- }
- // CHECK: return %[[R0]] : tensor<?xf32>
- return %ret0 : tensor<?xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir b/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir
deleted file mode 100644
index d06d20c..0000000
--- a/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir
+++ /dev/null
@@ -1,65 +0,0 @@
-// Tests printing and parsing of dispatch region ops.
-
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s | iree-opt -allow-unregistered-dialect -split-input-file | IreeFileCheck %s
-
-// CHECK-LABEL: @singleArg
-func @singleArg(%arg0 : tensor<?xf32>) {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
- // CHECK-NEXT: flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) {
- // CHECK-NEXT: flow.return
- // CHECK-NEXT: }
- %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
- flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) {
- flow.return
- }
- // CHECK-NEXT: return
- return
-}
-
-// -----
-
-// CHECK-LABEL: @multipleArgs
-func @multipleArgs(%arg0 : tensor<?xf32>, %arg1 : tensor<?xf32>) {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
- // CHECK-NEXT: flow.dispatch.region[%[[WORKLOAD]] : index](%arg2 = %arg0 : tensor<?xf32>, %arg3 = %arg1 : tensor<?xf32>) {
- // CHECK-NEXT: flow.return
- // CHECK-NEXT: }
- %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
- flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>, %i1 = %arg1 : tensor<?xf32>) {
- flow.return
- }
- // CHECK-NEXT: return
- return
-}
-
-// -----
-
-// CHECK-LABEL: @singleResult
-func @singleResult(%arg0 : tensor<?xf32>) -> tensor<?xf32> {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
- // CHECK-NEXT: %1 = flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>) {
- // CHECK-NEXT: flow.return %arg1 : tensor<?xf32>
- // CHECK-NEXT: }
- %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
- %ret0 = flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) -> tensor<?xf32> {
- flow.return %i0 : tensor<?xf32>
- }
- // CHECK-NEXT: return %1 : tensor<?xf32>
- return %ret0 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @multipleResults
-func @multipleResults(%arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
- // CHECK-NEXT: %1:2 = flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
- // CHECK-NEXT: flow.return %arg1, %arg1 : tensor<?xf32>, tensor<?xf32>
- // CHECK-NEXT: }
- %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
- %ret0, %ret1 = flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
- flow.return %i0, %i0 : tensor<?xf32>, tensor<?xf32>
- }
- // CHECK-NEXT: return %1#0, %1#1 : tensor<?xf32>, tensor<?xf32>
- return %ret0, %ret1 : tensor<?xf32>, tensor<?xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/Transforms/BUILD b/iree/compiler/Dialect/Flow/Transforms/BUILD
index 8d42e05..9b75f91 100644
--- a/iree/compiler/Dialect/Flow/Transforms/BUILD
+++ b/iree/compiler/Dialect/Flow/Transforms/BUILD
@@ -42,19 +42,14 @@
"ConvertToFlowTensorOps.cpp",
"DeduplicateExecutables.cpp",
"DestructiveUpdateUtils.cpp",
- "DispatchConfig.cpp",
"DispatchLinalgOnTensors.cpp",
- "DispatchabilityAnalysis.cpp",
"ExpandVariableDynamicDims.cpp",
"ExportBenchmarkFuncs.cpp",
- "FoldCompatibleDispatchRegions.cpp",
"FormStreams.cpp",
"HLOToHLOPreprocessing.cpp",
"HoistUnstreamableOps.cpp",
- "IdentifyDispatchRegions2.cpp",
"InjectDispatchTracing.cpp",
"LegalizeInputTypes.cpp",
- "OutlineDispatchRegions.cpp",
"OutlineDispatchRegions2.cpp",
"OutlineLargeConstants.cpp",
"PassDetail.h",
@@ -64,7 +59,6 @@
],
hdrs = [
"DestructiveUpdateUtils.h",
- "DispatchConfig.h",
"Passes.h",
"Passes.h.inc",
],
@@ -73,7 +67,6 @@
"//iree/compiler/Conversion/HLOToHLO",
"//iree/compiler/Conversion/HLOToLinalg:HLOToLinalgOnTensors",
"//iree/compiler/Conversion/LinalgToLinalg",
- "//iree/compiler/Dialect/Flow/Analysis",
"//iree/compiler/Dialect/Flow/Conversion",
"//iree/compiler/Dialect/Flow/Conversion/HLOToFlow",
"//iree/compiler/Dialect/Flow/Conversion/StandardToFlow",
diff --git a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
index b5c489c..5fbe0ea 100644
--- a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
@@ -24,26 +24,20 @@
Transforms
HDRS
"DestructiveUpdateUtils.h"
- "DispatchConfig.h"
"Passes.h"
"Passes.h.inc"
SRCS
"ConvertToFlowTensorOps.cpp"
"DeduplicateExecutables.cpp"
"DestructiveUpdateUtils.cpp"
- "DispatchConfig.cpp"
"DispatchLinalgOnTensors.cpp"
- "DispatchabilityAnalysis.cpp"
"ExpandVariableDynamicDims.cpp"
"ExportBenchmarkFuncs.cpp"
- "FoldCompatibleDispatchRegions.cpp"
"FormStreams.cpp"
"HLOToHLOPreprocessing.cpp"
"HoistUnstreamableOps.cpp"
- "IdentifyDispatchRegions2.cpp"
"InjectDispatchTracing.cpp"
"LegalizeInputTypes.cpp"
- "OutlineDispatchRegions.cpp"
"OutlineDispatchRegions2.cpp"
"OutlineLargeConstants.cpp"
"PassDetail.h"
@@ -72,7 +66,6 @@
iree::compiler::Conversion::HLOToHLO
iree::compiler::Conversion::HLOToLinalg::HLOToLinalgOnTensors
iree::compiler::Conversion::LinalgToLinalg
- iree::compiler::Dialect::Flow::Analysis
iree::compiler::Dialect::Flow::Conversion
iree::compiler::Dialect::Flow::Conversion::HLOToFlow
iree::compiler::Dialect::Flow::Conversion::StandardToFlow
diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
deleted file mode 100644
index bd315a6..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-
-#define DEBUG_TYPE "iree-detail"
-
-static llvm::cl::opt<bool> clEnableConsumerOnlyFusion(
- "iree-enable-consumer-only-fusion",
- llvm::cl::desc("Flag to enable fusion of matmul, etc. with its consumers, "
- "experimental flag to evaluate fusion"),
- llvm::cl::init(false));
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-// TODO(laurenzo): Every one of these should have better support and removed
-// from this exclusion list eventually.
-// Allowlist of ops that materialize to a an index-permuted copy of some kind
-// if they exist standalone. Generally we try to avoid anchoring on these,
-// letting them fuse into more meaningful ops as possible.
-bool isIndexOp(Operation *op) {
- // TODO(laurenzo): Curate this list more specifically (or have a better
- // mechanism for determining).
- return isa<Shape::RankedBroadcastInDimOp>(op) ||
- isa<mhlo::BroadcastInDimOp>(op) || isa<mhlo::BroadcastOp>(op) ||
- isa<mhlo::DynamicBroadcastInDimOp>(op) ||
- isa<mhlo::DynamicReshapeOp>(op) || isa<mhlo::DynamicSliceOp>(op) ||
- isa<mhlo::SliceOp>(op) || isa<mhlo::TransposeOp>(op);
-}
-
-/// Returns true if |lhs| and |rhs| return a single value with the same shape.
-bool checkSameOutputShape(Operation *lhs, Operation *rhs) {
- if (lhs->getNumResults() != 1 || rhs->getNumResults() != 1) {
- return false;
- }
- ShapedType lhsType = lhs->getResults()[0].getType().dyn_cast<ShapedType>();
- ShapedType rhsType = rhs->getResults()[0].getType().dyn_cast<ShapedType>();
- if (!lhsType || !rhsType || lhsType != rhsType) {
- return false;
- }
- // The shapes match, but if one of the ops is a transpose the transpose shape
- // might match, especially in dynamic case where the shapes might be different
- // at runtime.
- if (isa<mhlo::TransposeOp>(lhs) || isa<mhlo::TransposeOp>(rhs)) {
- return false;
- }
- return true;
-}
-} // namespace
-
-//------------------------------------------------------------------------------
-// OpDispatchPolicy
-//------------------------------------------------------------------------------
-
-bool OpDispatchPolicy::isDispatchable(Operation *op) {
- if (FlowDialect::isDialectOp(op)) {
- // Ignore things we've already produced as they should only relate to
- // sequencer operations.
- LLVM_DEBUG(llvm::dbgs() << " NOT DISPATCHABLE (Flow Dialect): "
- << op->getName() << "\n");
- return false;
- } else if (op->hasTrait<OpTrait::IsTerminator>()) {
- // Currently we skip all terminators as we want to leave them in the block
- // to keep it valid. Future folding passes may take care of them if they are
- // worth bringing into the dispatch region.
- LLVM_DEBUG(llvm::dbgs() << " NOT DISPATCHABLE (Known Terminator): "
- << op->getName() << "\n");
- return false;
- } else if (auto callOp = dyn_cast<CallOp>(op)) {
- bool dispatchable = dispatchability.isDispatchable(callOp.getCallee());
- LLVM_DEBUG(llvm::dbgs()
- << " " << (dispatchable ? "" : "NOT ")
- << "DISPATCHABLE (Call): " << op->getName() << "\n");
- return dispatchable;
- } else if (isa<CallIndirectOp>(op)) {
- // Indirect calls are not supported in dispatch code.
- LLVM_DEBUG(llvm::dbgs() << " NOT DISPATCHABLE (Call Indirect): "
- << op->getName() << "\n");
- return false;
- } else if (isa<ConstantOp>(op)) {
- // Constants are handled in the RematerializeDispatchConstants pass.
- // We do that independently so that we can more easily see the use of
- // constants across all dispatches instead of just on an individual basis
- // as we do here.
- LLVM_DEBUG(llvm::dbgs()
- << " NOT DISPATCHABLE (Constant): " << op->getName() << "\n");
- return false;
- } else if (op->getNumResults() &&
- !op->getResult(0).getType().isa<ShapedType>()) {
- // We don't put scalar manipulation into dispatch regions.
- LLVM_DEBUG(llvm::dbgs()
- << " NOT DISPATCHABLE (Non Shaped): " << op->getName() << "\n");
- return false;
- } else if (!isOpOfKnownDialect(op)) {
- // Probably a custom op.
- LLVM_DEBUG(llvm::dbgs() << " NOT DISPATCHABLE (Unknown Dialect): "
- << op->getName() << "\n");
- return false;
- }
- LLVM_DEBUG(llvm::dbgs() << " DISPATCHABLE: " << op->getName() << "\n");
- return true;
-}
-
-bool OpDispatchPolicy::isIdentityMetadata(Operation *op) {
- return isa<linalg::InitTensorOp, Shape::TieShapeOp, Shape::MakeRankedShapeOp>(
- op);
-}
-
-bool OpDispatchPolicy::isViewModificationOp(Operation *op) {
- return isa<mhlo::ReshapeOp, linalg::TensorReshapeOp>(op);
-}
-
-int OpDispatchPolicy::getAnchorBenefit(Operation *op) {
- if (isUnsupportedFusionOp(op) || isFusableWithConsumersOnly(op)) {
- return 100;
- }
-
- if (isIdentityMetadata(op)) {
- // Cannot anchor.
- return 0;
- } else if (isViewModificationOp(op)) {
- return 1;
- } else if (isIndexOp(op)) {
- // We generally do not want to form anchors around ops that just do a copy
- // (perhaps with an affine map) except as a last resort.
- return 5;
- } else if (isa<mhlo::SelectOp>(op)) {
- // TODO(#2050): In a number of cases, this makes it less likely to split
- // a DR across a compare/select boundary. Remove this once i1 is legalized
- // properly.
- return 15;
- } else {
- // Most dispatchable ops can anchor but are a fairly low benefit.
- return 10;
- }
-}
-
-OpDispatchPolicy::FusionType OpDispatchPolicy::fuseInput(Operation *anchorOp,
- Operation *inputOp) {
- if (inputOp->hasTrait<OpTrait::IsTerminator>()) return FusionType::DISABLED;
-
- if (isIdentityMetadata(inputOp) || isViewModificationOp(inputOp)) {
- // Shape ties must always be duplicated into the region and remain in their
- // original position. This should apply to any such "metadata" ops.
- return FusionType::CLONE_INTO;
- }
- if (isUnsupportedFusionOp(anchorOp) || isUnsupportedFusionOp(inputOp)) {
- return FusionType::DISABLED;
- }
- if (isFusableWithConsumersOnly(anchorOp)) {
- return FusionType::DISABLED;
- }
-
- // By default for operands, they are duplicated into the dispatch region.
- // Typically at the initial fusion stage, there is not a sufficient cost
- // model to determine whether it is more beneficial to fuse or materialize,
- // so the bias is towards fusion and leaving inter-region analysis to a later
- // phase.
- return FusionType::CLONE_INTO;
-}
-
-OpDispatchPolicy::FusionType OpDispatchPolicy::fuseOutput(Operation *anchorOp,
- Operation *outputOp) {
- if (outputOp->hasTrait<OpTrait::IsTerminator>() ||
- outputOp->getNumResults() == 0) {
- return FusionType::DISABLED;
- }
- if (isIdentityMetadata(outputOp) || isViewModificationOp(outputOp)) {
- return FusionType::MOVE_INTO;
- }
-
- if (isUnsupportedFusionOp(anchorOp) || isUnsupportedFusionOp(outputOp)) {
- return FusionType::DISABLED;
- }
- if (isFusableWithConsumersOnly(anchorOp) &&
- !isFusableWithConsumersOnly(outputOp)) {
- if (isFusableWithConsumerOfSameOutputShapeOnly(anchorOp) &&
- !checkSameOutputShape(anchorOp, outputOp)) {
- return FusionType::DISABLED;
- }
- return FusionType::MOVE_INTO;
- }
-
- // Generally, it is hard to reason locally about the legality of fusing an
- // output, since additional analysis may need to be done to determine
- // workload compatibility (especially with dynamic shapes involved). As
- // such, we do as little as possible here and instead rely on optimization
- // passes to merge compatible regions.
- return FusionType::DISABLED;
-}
-
-bool OpDispatchPolicy::isFusableWithConsumerOfSameOutputShapeOnly(
- Operation *op) {
- return clEnableConsumerOnlyFusion && isa<mhlo::DotOp, mhlo::DotGeneralOp>(op);
-}
-
-bool OpDispatchPolicy::isFusableWithConsumersOnly(Operation *op) {
- return isFusableWithConsumerOfSameOutputShapeOnly(op);
-}
-
-// TODO(b/144530470): replace with tablegen attributes/interfaces.
-bool OpDispatchPolicy::isUnsupportedFusionOp(Operation *op) {
- return isa<linalg::IndexedGenericOp, linalg::GenericOp, mhlo::ConcatenateOp,
- mhlo::ConvOp, mhlo::PadOp, mhlo::ReduceOp, mhlo::ReduceWindowOp,
- mhlo::SliceOp>(op) ||
- (!clEnableConsumerOnlyFusion &&
- isa<mhlo::DotOp, mhlo::DotGeneralOp>(op)) ||
- isLeafOnlyOp(op);
-}
-
-bool OpDispatchPolicy::isLeafOnlyOp(Operation *op) {
- return isa<mhlo::TorchIndexSelectOp>(op);
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h
deleted file mode 100644
index 6d82d22..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "mlir/IR/Operation.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-// Queries dispatch options for an operation.
-// This is presently mostly a hard-coded set of heuristics but should expand
-// to be based on both queries of new op interfaces and a cost model.
-class OpDispatchPolicy {
- public:
- // The benefit that selecting an anchor is expected to provide. Anchors
- // with higher benefit should be formed first.
- using AnchorBenefit = int;
- enum class FusionType {
- // Fusion is disallowed.
- DISABLED = 0,
- // The operation should be duped into the dispatch region.
- CLONE_INTO = 1,
- // The operation should be cloned into the dispatch region and have
- // uses be redirected to the dispatch region.
- MOVE_INTO = 3,
- };
-
- OpDispatchPolicy(Dispatchability &dispatchability)
- : dispatchability(dispatchability) {}
-
- // Returns true if |op| is only fusable with its consumers.
- static bool isFusableWithConsumersOnly(Operation *op);
-
- // Returns true if |op| is only fusable with its consumers where consumer has
- // the same shape for the output.
- static bool isFusableWithConsumerOfSameOutputShapeOnly(Operation *op);
-
- // Returns true if |op| can be treated as a view modification, i.e. eventually
- // the op is lowered to just change the way the underlying buffer holding the
- // values is viewed w.r.t shape, etc.
- static bool isViewModificationOp(Operation *op);
-
- // Returns true if |op| is not able to fuse with either producer or consumer.
- static bool isUnsupportedFusionOp(Operation *op);
-
- // Returns true if |op| can only be a leaf op.
- static bool isLeafOnlyOp(Operation *op);
-
- // Returns true if the given |op| can be dispatched in all cases.
- // Other passes may handle special cases of these ops but this initial
- // identification is conservative.
- bool isDispatchable(Operation *op);
-
- // Returns true if the op is an "identity metadata" op that must be preserved
- // at use-def boundaries. Such ops are non-executalbe, with >= 1 operands
- // and one result where the result is assumed to be operand(0) with any
- // op-specific metadata attached.
- bool isIdentityMetadata(Operation *op);
-
- // Returns the benefit of treating the given op as an anchor to form a
- // dispatch region around, where <= 0 disables the ability of the op to
- // be an anchor.
- // Anchors are identified greedily by sorting in descending order of
- // anchor benefit and ascending topological order (i.e. all ops with the
- // highest benefit have a dispatch region greedily formed around them
- // prior to proceeding to ops with the next lowest benefit).
- //
- // It is only valid to call this for dispatchable ops.
- AnchorBenefit getAnchorBenefit(Operation *op);
-
- // Returns the type of fusion that can be done for an input op that feeds
- // into a given anchor op.
- FusionType fuseInput(Operation *anchorOp, Operation *inputOp);
-
- // Returns the type of fusion that can be done for an output op that
- // follows an anchor op.
- FusionType fuseOutput(Operation *anchorOp, Operation *outputOp);
-
- private:
- Dispatchability &dispatchability;
-};
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp
deleted file mode 100644
index cfc09b9..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-class DispatchabilityAnalysisPass
- : public DispatchabilityAnalysisBase<DispatchabilityAnalysisPass> {
- public:
- DispatchabilityAnalysisPass() = default;
-
- void runOnOperation() override {
- // Force creation (or caching) of dispatchability information.
- auto &dispatchability = getAnalysis<Dispatchability>();
- markAllAnalysesPreserved();
-
- // Build the dispatchable func table.
- if (dispatchableFuncOps_) {
- dispatchability.walkDispatchableOps([&](FuncOp funcOp) {
- (*dispatchableFuncOps_)[funcOp.getName()] = funcOp;
- });
- }
- }
-
- std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps_;
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityAnalysisPass() {
- return std::make_unique<DispatchabilityAnalysisPass>();
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp b/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
index a0c9cdd..5022da5 100644
--- a/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
@@ -27,12 +27,10 @@
namespace IREE {
namespace Flow {
-// Exports two kind of benchmark functions:
-// - Creates exported functions to invoke each executable op.
-// - Clones each exported functions (including those just created) with
-// placeholder constant inputs instead of arguments and removes the
-// exported attribute from the old functions.
-// The input are provided using flow.variable and flow.lookup.
+// Clones each exported functions (including those just created) with
+// placeholder constant inputs instead of arguments and removes the exported
+// attribute from the old functions.
+// The input are provided using flow.variables.
class ExportBenchmarkFuncsPass
: public ExportBenchmarkFuncsBase<ExportBenchmarkFuncsPass> {
public:
@@ -51,11 +49,6 @@
for (auto entryFuncOp : entryFuncOps) {
createEntryPointBenchmarkFunc(moduleOp, entryFuncOp);
}
-
- // Create one benchmark function per entry point in each flow.executable.
- for (auto executableOp : moduleOp.getOps<IREE::Flow::ExecutableOp>()) {
- createExecutableBenchmarkFunc(moduleOp, executableOp);
- }
}
private:
@@ -122,63 +115,6 @@
entryFuncOp.setPrivate();
}
- void createExecutableBenchmarkFunc(ModuleOp moduleOp,
- IREE::Flow::ExecutableOp executableOp) {
- OpBuilder moduleBuilder(&getContext());
- moduleBuilder.setInsertionPointAfter(executableOp);
- for (auto& op : executableOp.getBlock()) {
- auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op);
- if (!dispatchEntryOp) continue;
- auto execFuncOp = executableOp.getInnerModule().lookupSymbol<FuncOp>(
- dispatchEntryOp.function_ref());
- Location loc = execFuncOp.getLoc();
-
- // Create one dummy input variable per input.
- SmallVector<IREE::Flow::VariableOp, 4> dummyInputVariableOps;
- for (auto inputType : execFuncOp.getType().getInputs()) {
- dummyInputVariableOps.push_back(
- createDummyInputVariableOp(loc, inputType, moduleBuilder));
- }
-
- // Create a `() -> ()` entry point op the benchmark tool can run.
- std::string funcName = std::string(execFuncOp.getName()) + "_benchmark";
- auto funcType = moduleBuilder.getFunctionType({}, {});
- auto funcOp = moduleBuilder.create<FuncOp>(loc, funcName, funcType);
- funcOp->setAttr("iree.module.export", moduleBuilder.getUnitAttr());
- funcOp->setAttr("iree.abi.stub", moduleBuilder.getUnitAttr());
- SmallVector<NamedAttribute> reflectionAttrs = {
- moduleBuilder.getNamedAttr("benchmark",
- moduleBuilder.getStringAttr("dispatch")),
- };
- funcOp->setAttr("iree.reflection",
- moduleBuilder.getDictionaryAttr(reflectionAttrs));
- Block* block = funcOp.addEntryBlock();
-
- // Build the body of the FuncOp.
- auto blockBuilder = OpBuilder(block, block->begin());
- SmallVector<Value, 4> args;
- for (auto variableOp : dummyInputVariableOps) {
- args.push_back(blockBuilder.createOrFold<IREE::Flow::VariableLoadOp>(
- loc, variableOp));
- }
-
- // TODO(hanchung): Use a real workload instead? We can probably
- // calculate the workload from the results.
- auto dummyWorkload = blockBuilder.create<ConstantIndexOp>(loc, 0);
- auto dispatchOp = blockBuilder.create<DispatchOp>(
- loc, dispatchEntryOp, ValueRange{dummyWorkload},
- execFuncOp.getType().getResults(), ValueRange{}, args, ValueRange{},
- ArrayRef<int64_t>{});
-
- // Sink all results with do_not_optimize to ensure that DCE does not
- // remove the dispatch.
- for (auto result : dispatchOp.getResults()) {
- blockBuilder.create<IREE::DoNotOptimizeOp>(loc, result);
- }
- blockBuilder.create<mlir::ReturnOp>(loc);
- }
- }
-
int uniqueId = 0;
};
diff --git a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
deleted file mode 100644
index 08ed480..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Transforms/Utils.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-// Replaces |returnOp| with a clone including |newOperands| appended.
-LogicalResult appendReturnOperands(IREE::Flow::ReturnOp returnOp,
- ArrayRef<Value> newOperands) {
- // Insert prior to the original return.
- OpBuilder builder(returnOp);
-
- // Clone with new args.
- SmallVector<Value, 8> operands;
- operands.reserve(returnOp.getNumOperands() + newOperands.size());
- operands.append(returnOp.operand_begin(), returnOp.operand_end());
- operands.append(newOperands.begin(), newOperands.end());
- builder.create<IREE::Flow::ReturnOp>(returnOp.getLoc(), operands);
-
- // Remove original.
- returnOp.erase();
-
- return success();
-}
-
-// Replaces |regionOp| with a clone including |newArgs| and |newResults|.
-DispatchRegionOp appendRegionArgsAndResults(DispatchRegionOp ®ionOp,
- ArrayRef<Value> newArgs,
- ArrayRef<Value> newResults,
- Location otherLoc) {
- // Insert prior to the original region.
- OpBuilder builder(regionOp);
-
- // Location is original region + new region location (both probably fused).
- SmallVector<Location, 2> fusedLocs = {regionOp.getLoc(), otherLoc};
- auto fusedLoc = FusedLoc::get(regionOp.getContext(), fusedLocs);
-
- // Clone with new results.
- SmallVector<Value, 8> operands;
- operands.append(regionOp.args().begin(), regionOp.args().end());
- operands.append(newArgs.begin(), newArgs.end());
- SmallVector<Type, 8> resultTypes;
- resultTypes.append(regionOp.result_type_begin(), regionOp.result_type_end());
- for (auto newResult : newResults) {
- resultTypes.push_back(newResult.getType());
- }
- auto newRegionOp = builder.create<DispatchRegionOp>(
- fusedLoc, resultTypes, regionOp.workload(), operands,
- regionOp->getAttrs());
- newRegionOp.body().takeBody(regionOp.body());
-
- // Replace uses of original values with the new values.
- for (int i = 0; i < regionOp.getNumResults(); ++i) {
- regionOp.getResult(i).replaceAllUsesWith(newRegionOp.getResult(i));
- }
-
- // Erase the original region.
- regionOp.erase();
-
- return newRegionOp;
-}
-
-// Removes results that are not used from the dispatch region.
-// Returns the new operation. There may be unused ops in the region but DCE
-// should take care of that later.
-DispatchRegionOp removeUnusedResults(DispatchRegionOp regionOp) {
- // Find return value within the region.
- auto ®ionBlock = regionOp.body().getBlocks().front();
- auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(regionBlock.getTerminator());
- if (!returnOp) {
- regionBlock.getParent()->getParentOfType<FuncOp>().emitError()
- << "block does not contain an flow.return op";
- }
-
- // Calculate new return values.
- SmallVector<Type, 8> newReturnTypes;
- SmallVector<Value, 8> newReturnValues;
- SmallVector<Value, 8> newRegionResults;
- for (int i = 0; i < returnOp.getNumOperands(); ++i) {
- auto resultValue = regionOp.getResult(i);
- if (!resultValue.use_empty()) {
- // Still has uses so we will preserve it.
- newReturnTypes.push_back(resultValue.getType());
- newReturnValues.push_back(returnOp.getOperand(i));
- newRegionResults.push_back(resultValue);
- }
- }
-
- // Update return op operands. We can do this in-place as we are only shrinking
- // the list.
- returnOp.getOperation()->setOperands(newReturnValues);
-
- // Insert prior to the original region.
- OpBuilder builder(regionOp);
-
- // Clone with new results.
- auto newRegionOp = builder.create<DispatchRegionOp>(
- regionOp.getLoc(), newReturnTypes, regionOp.workload(), regionOp.args(),
- regionOp->getAttrs());
- newRegionOp.body().takeBody(regionOp.body());
-
- // Replace uses of original values with the new values.
- for (int i = 0; i < newRegionResults.size(); ++i) {
- newRegionResults[i].replaceAllUsesWith(newRegionOp.getResult(i));
- }
-
- // Erase the original region.
- regionOp.erase();
-
- return newRegionOp;
-}
-
-// Returns true if |lhs| and |rhs| have either an identical workload or one that
-// is compatible.
-bool areDispatchRegionWorkloadsCompatible(DispatchRegionOp &lhs,
- DispatchRegionOp &rhs) {
- // TODO(benvanik): more sophisticated checking; right now it's just identical.
- return lhs.workload() == rhs.workload();
-}
-
-// Returns true if |value| depends in any way on |op| through any path.
-bool doesValueDependOnOperation(Value value, Operation *op) {
- if (!value.getDefiningOp()) {
- return false;
- } else if (value.getDefiningOp() == op) {
- return true;
- } else if (value.getDefiningOp()->getBlock() == op->getBlock() &&
- value.getDefiningOp()->isBeforeInBlock(op)) {
- // Can't depend on |op| as it is defined prior to it.
- return false;
- } else if (value.getDefiningOp()->getBlock() == op->getBlock() &&
- !value.getDefiningOp()->isBeforeInBlock(op)) {
- // |op| is defined before one of |value| operands.
- return true;
- }
- for (auto operand : value.getDefiningOp()->getOperands()) {
- if (doesValueDependOnOperation(operand, op)) {
- return true;
- }
- }
- return false;
-}
-
-// Returns true if |rhs| transitively depends on any out of |lhs|.
-// |rhs| may depend directly on the results of |lhs| but no other ops in the
-// parent block will use the results prior to |rhs|.
-bool areDispatchRegionsTransitivelyDependent(DispatchRegionOp &lhs,
- DispatchRegionOp &rhs) {
- for (auto arg : rhs.args()) {
- if (arg.getDefiningOp() != lhs && doesValueDependOnOperation(arg, lhs)) {
- // Transitively dependent - boo - can't merge yet.
- return true;
- }
- }
- return false;
-}
-
-// Returns true if the dispatch region contains only a single block.
-// This is because our merge isn't very smart and will not preserve the CFG
-// right now. We can fix this when needed.
-bool isDispatchRegionMergable(DispatchRegionOp ®ionOp) {
- // Disallow merging of dispatch regions containing matmuls and other big ops.
- // We do this to allow backends to lower the big op as entirely isolated such
- // that substituting library calls is easier.
- for (auto &block : regionOp.body().getBlocks()) {
- for (auto &op : block) {
- // A leaf only op is mergable.
- if ((OpDispatchPolicy::isUnsupportedFusionOp(&op) ||
- OpDispatchPolicy::isFusableWithConsumersOnly(&op)) &&
- !OpDispatchPolicy::isLeafOnlyOp(&op)) {
- return false;
- }
- }
- }
- return regionOp.body().getBlocks().size() == 1;
-}
-
-// Returns true if rhs has ops that can only be leaf op and will lose the
-// characteristic if merge two dispatch regions.
-bool rhsHasLeafOnlyOp(DispatchRegionOp &lhs, DispatchRegionOp &rhs) {
- auto &rhsBlock = rhs.body().front();
- auto lhsArgs = llvm::to_vector<8>(lhs.args());
- auto rhsArgs = llvm::to_vector<8>(rhs.args());
- for (int rhsOpIdx = 0; rhsOpIdx < rhsArgs.size(); ++rhsOpIdx) {
- for (int lhsResultIdx = 0; lhsResultIdx < lhs.getNumResults();
- ++lhsResultIdx) {
- if (rhsArgs[rhsOpIdx] != lhs.getResult(lhsResultIdx)) continue;
- for (auto *user : rhsBlock.getArgument(rhsOpIdx).getUsers()) {
- if (OpDispatchPolicy::isLeafOnlyOp(user)) return true;
- }
- }
- }
- return false;
-}
-
-// Merges |rhs| into |lhs| and returns the new |lhs| op.
-// Precondition: !areDispatchRegionsTransitivelyDependent
-DispatchRegionOp mergeDispatchRegions(DispatchRegionOp &lhs,
- DispatchRegionOp &rhs) {
- auto &lhsBlock = lhs.body().front();
- auto &rhsBlock = rhs.body().front();
-
- // Find the values used as return values in the lhs.
- // We'll need to replace the uses in rhs with these.
- auto lhsReturnOp = cast<IREE::Flow::ReturnOp>(lhsBlock.getTerminator());
- SmallVector<Value, 8> lhsReturnValues;
- lhsReturnValues.reserve(lhsReturnOp.getNumOperands());
- lhsReturnValues.append(lhsReturnOp.operand_begin(),
- lhsReturnOp.operand_end());
-
- // Find the values used as return values in the rhs.
- // We'll add these to the results of the lhs region.
- auto rhsReturnOp = cast<IREE::Flow::ReturnOp>(rhsBlock.getTerminator());
- SmallVector<Value, 8> rhsReturnValues;
- rhsReturnValues.reserve(rhsReturnOp.getNumOperands());
- rhsReturnValues.append(rhsReturnOp.operand_begin(),
- rhsReturnOp.operand_end());
-
- // Compute new args.
- BlockAndValueMapping mapping;
- SmallVector<Value, 8> newArgs;
- auto lhsArgs = llvm::to_vector<8>(lhs.args());
- auto rhsArgs = llvm::to_vector<8>(rhs.args());
- for (int rhsOpIdx = 0; rhsOpIdx < rhsArgs.size(); ++rhsOpIdx) {
- bool didElide = false;
- // Find if the rhs arg already exists on the lhs and dedupe.
- for (int lhsOpIdx = 0; lhsOpIdx < lhsArgs.size(); ++lhsOpIdx) {
- if (rhsArgs[rhsOpIdx] == lhsArgs[lhsOpIdx]) {
- mapping.map(rhsBlock.getArgument(rhsOpIdx),
- lhsBlock.getArgument(lhsOpIdx));
- didElide = true;
- break;
- }
- }
- // Find if the arg has a direct dependency on the results of the lhs.
- for (int lhsResultIdx = 0; lhsResultIdx < lhs.getNumResults();
- ++lhsResultIdx) {
- if (rhsArgs[rhsOpIdx] == lhs.getResult(lhsResultIdx)) {
- // Direct dependency; can elide. We'll skip adding it to the new region
- // args and instead just remap it later.
- mapping.map(rhsBlock.getArgument(rhsOpIdx),
- lhsReturnValues[lhsResultIdx]);
- didElide = true;
- break;
- }
- }
- if (!didElide) {
- // Add to the lhs block.
- auto oldArg = rhs.getOperand(rhsOpIdx + 1);
- auto newArg = lhsBlock.addArgument(oldArg.getType());
- mapping.map(rhsBlock.getArgument(rhsOpIdx), newArg);
- newArgs.push_back(oldArg);
- }
- }
-
- OpBuilder regionBuilder = OpBuilder::atBlockEnd(&lhsBlock);
-
- // Copy ops (replacing any args as needed).
- // Note that we need to insert prior to the terminator.
- regionBuilder.setInsertionPoint(lhsReturnOp);
- for (auto &op : rhsBlock) {
- // Note that this updates the mapping with the new values (so at the end
- // we have those new values).
- //
- // We avoid the return op here as we have already merged it above.
- if (!op.hasTrait<OpTrait::IsTerminator>()) {
- regionBuilder.clone(op, mapping);
- }
- }
-
- // Compute new results and add to both region and return op.
- SmallVector<Value, 8> newResults;
- for (auto rhsResult : rhsReturnValues) {
- newResults.push_back(mapping.lookupOrDefault(rhsResult));
- }
- if (failed(appendReturnOperands(lhsReturnOp, newResults))) {
- return nullptr;
- }
- auto newRegionOp =
- appendRegionArgsAndResults(lhs, newArgs, newResults, rhs.getLoc());
-
- // Replace uses of original values with the new values.
- for (int i = 0; i < rhs.getNumResults(); ++i) {
- rhs.getResult(i).replaceAllUsesWith(
- newRegionOp.getResult(lhsReturnValues.size() + i));
- }
-
- // Remove rhs region.
- rhs.erase();
-
- // Remove results from the lhs that aren't used anymore as they may have been
- // elided when we merged as only the rhs was using them.
- newRegionOp = removeUnusedResults(newRegionOp);
-
- return newRegionOp;
-}
-
-// Merges multiple dispatch regions within a block into the same region,
-// if possible. Operations may be reordered if it's possible to merge more while
-// still obeying data dependencies.
-LogicalResult mergeBlockDispatchRegions(FuncOp func, Block *parentBlock) {
- LLVM_DEBUG(llvm::dbgs() << "+++ MERGING BLOCK DISPATCH REGIONS:\n");
- SmallVector<DispatchRegionOp, 8> mergableRegions;
- for (auto &op : *parentBlock) {
- if (auto regionOp = dyn_cast<DispatchRegionOp>(op)) {
- if (isDispatchRegionMergable(regionOp)) {
- LLVM_DEBUG(llvm::dbgs() << " -REGION MERGABLE-\n");
- mergableRegions.push_back(regionOp);
- } else {
- LLVM_DEBUG(llvm::dbgs() << " -REGION NOT MERGABLE-\n");
- }
- }
- }
- for (int i = 0; i < mergableRegions.size(); ++i) {
- if (!mergableRegions[i]) continue;
- auto &lhs = mergableRegions[i];
- for (int j = i + 1; j < mergableRegions.size(); ++j) {
- if (!mergableRegions[j]) continue;
- auto &rhs = mergableRegions[j];
- if (!areDispatchRegionWorkloadsCompatible(lhs, rhs) ||
- areDispatchRegionsTransitivelyDependent(lhs, rhs)) {
- LLVM_DEBUG(llvm::dbgs() << " -REGIONS INCOMPATIBLE-\n");
- continue;
- }
- if (!isDispatchRegionMergable(rhs)) {
- // TODO(b/134675461): support non-trivial control flow.
- LLVM_DEBUG(llvm::dbgs()
- << " -REGION CONTAINS NON-TRIVIAL CONTROL FLOW-\n");
- }
- if (rhsHasLeafOnlyOp(lhs, rhs)) {
- LLVM_DEBUG(llvm::dbgs() << " -RHS REGION HAS LEAF OP-\n");
- continue;
- }
- mergableRegions[i] = mergeDispatchRegions(lhs, rhs);
- if (!mergableRegions[i]) {
- return failure();
- }
- mergableRegions[j] = nullptr;
- --i; // Try again to see if there are subsequent regions to merge.
- LLVM_DEBUG(llvm::dbgs() << " -> MERGED REGIONS\n");
- break;
- }
- }
-
- LLVM_DEBUG(llvm::dbgs() << "--- MERGED BLOCK DISPATCH REGIONS\n");
- return success();
-}
-
-} // namespace
-
-// Identifies dispatch regions that have compatible workloads and folds them.
-// This relies on CSE having deduped workloads to simplify the logic to simply
-// looking for dispatch regions using the same values.
-class FoldCompatibleDispatchRegionsPass
- : public FoldCompatibleDispatchRegionsBase<
- FoldCompatibleDispatchRegionsPass> {
- public:
- void runOnOperation() override {
- FuncOp func = getOperation();
- for (auto &block : func) {
- if (failed(mergeBlockDispatchRegions(func, &block))) {
- return signalPassFailure();
- }
- }
- }
-};
-
-std::unique_ptr<OperationPass<FuncOp>>
-createFoldCompatibleDispatchRegionsPass() {
- return std::make_unique<FoldCompatibleDispatchRegionsPass>();
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp b/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp
deleted file mode 100644
index 033e3b9..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp
+++ /dev/null
@@ -1,436 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOpUtils.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/WorkloadUtils.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/Debug.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/RegionUtils.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-struct DispatchableOp {
- OpDispatchPolicy::AnchorBenefit anchorBenefit;
- size_t index;
- Operation *op;
-
- bool operator<(const DispatchableOp &other) const {
- // Note inverted index: this is so that traversing a sorted list in
- // reverse yields a topological ordering for each anchorBenefit.
- return std::tie(anchorBenefit, other.index) <
- std::tie(other.anchorBenefit, index);
- }
-};
-
-struct DispatchRegion {
- DispatchRegionOp op;
- Operation *anchorOp;
-
- Block &getEntryBlock() { return op.body().front(); }
-
- static llvm::Optional<DispatchRegion> form(Operation *anchorOp) {
- auto loc = anchorOp->getLoc();
- if (anchorOp->getNumResults() < 1) {
- emitError(loc) << "dispatch anchor op must have at least one result: "
- << *anchorOp;
- return llvm::None;
- }
- Value result = anchorOp->getResult(0);
- Value workload = calculateWorkload(anchorOp, result);
- if (!workload) return llvm::None;
-
- OpBuilder builder(anchorOp->getContext());
- auto created =
- DispatchRegionOp::formFromAnchorOp(workload, anchorOp, builder);
- if (!created) return llvm::None;
- return DispatchRegion{created->first, created->second};
- }
-
- // After a call to inlineDispatchOp, adds the results of the inlined op to
- // the dispatch region's results and redirects any uses outside of the
- // dispatch region.
- void returnAndReplaceUses(Operation *origOp, Operation *inlinedOp) {
- // Extend the arity of the dispatch region.
- OpBuilder builder(op.getContext());
- llvm::SmallVector<Value, 4> addlResults(inlinedOp->getResults());
- origOp->replaceAllUsesWith(
- DispatchRegionOp::appendResults(op, addlResults, builder));
- }
-};
-
-// Clones and hoists any identity metadata ops from the operands and results
-// of the dispatch region back out into the surrounding block.
-// This function is not general purpose: it only knows how to undo sinking
-// done by dispatch region formation.
-void hoistDispatchRegionMetadataOps(DispatchRegion &dr,
- OpDispatchPolicy &policy) {
- BlockAndValueMapping mapping;
- Block &block = dr.getEntryBlock();
- for (unsigned i = 0, e = block.getNumArguments(); i < e; ++i) {
- mapping.map(block.getArgument(i), dr.op.args()[i]);
- }
-
- // Hoist metadata ops from the operand edge.
- for (auto it : llvm::enumerate(block.getArguments())) {
- auto &blockArg = it.value();
- for (auto &blockUse : blockArg.getUses()) {
- Operation *useOp = blockUse.getOwner();
- if (!policy.isIdentityMetadata(useOp) || useOp->getOperand(0) != blockArg)
- continue;
- OpBuilder builder(dr.op);
- Operation *newOp = builder.clone(*useOp, mapping);
- dr.op.argsMutable().slice(it.index(), 1).assign(newOp->getResult(0));
- }
- }
-
- // Hoist metadata ops from the result edge.
- // Since initial formation can only have a single block, this is safe.
- auto *terminator = block.getTerminator();
- for (auto it : llvm::enumerate(terminator->getOperands())) {
- Operation *defOp = it.value().getDefiningOp();
- if (!defOp || !policy.isIdentityMetadata(defOp)) continue;
- OpBuilder builder(dr.op.getContext());
- builder.setInsertionPointAfter(dr.op);
- Operation *newOp = builder.clone(*defOp, mapping);
- dr.op.getResult(it.index()).replaceAllUsesWith(newOp->getResult(0));
- newOp->setOperand(0, dr.op.getResult(it.index()));
- }
-}
-
-void findDispatchableAnchorOps(Block &block, OpDispatchPolicy &policy,
- OpDispatchPolicy::AnchorBenefit maxAnchorBenefit,
- llvm::SmallVectorImpl<DispatchableOp> &ops) {
- for (auto it : llvm::enumerate(block.getOperations())) {
- Operation *op = &it.value();
- // Skip any already formed dispatch regions and non dispatchable ops.
- if (isa<DispatchRegionOp>(op)) continue;
- if (!policy.isDispatchable(op)) continue;
- OpDispatchPolicy::AnchorBenefit anchorBenefit = policy.getAnchorBenefit(op);
- if (anchorBenefit > maxAnchorBenefit || anchorBenefit <= 0) continue;
- ops.push_back({anchorBenefit, it.index(), op});
- }
-}
-
-// Maintains a worklist of operations that are potential fusion candidates.
-// By default, items are popped in inverse topological order. An operation
-// can only be added to a worklist once and later additions will be ignored.
-class FusionWorklist {
- public:
- FusionWorklist(Block *block, bool inverseTopological = true)
- : block(block), inverseTopological(inverseTopological) {}
-
- // Adds defining ops of operands to the worklist.
- void addOperandDefs(OperandRange operands) {
- for (Value operand : operands) {
- Operation *def = operand.getDefiningOp();
- if (!def) continue;
- if (def->getBlock() != block) continue;
- if (!isValidItem(def)) continue;
- if (!visited.insert(def).second) continue;
- worklist.push_back(def);
- dirty = true;
- }
- }
-
- // Adds uses.
- void addResultUses(ResultRange results) {
- for (auto result : results) {
- for (auto &use : result.getUses()) {
- Operation *def = use.getOwner();
- if (def->hasTrait<OpTrait::IsTerminator>()) continue;
- if (def->getBlock() != block) continue;
- if (!isValidItem(def)) continue;
- if (!visited.insert(def).second) continue;
- worklist.push_back(def);
- dirty = true;
- }
- }
- }
-
- // Pops the next operation or nullptr if empty.
- Operation *popNext() {
- if (worklist.empty()) return nullptr;
- if (dirty) sort();
- return worklist.pop_back_val();
- }
-
- private:
- bool isValidItem(Operation *op) {
- // Dispatch regions cannot be added to the worklist because they are
- // modified/deleted in place and can not be guaranteed valid for the
- // duration of the worklist.
- return !llvm::isa<DispatchRegionOp>(op);
- }
-
- // Sorts worklist items such that popNext() values pop in inverse
- // topological order.
- void sort() {
- if (inverseTopological) {
- llvm::sort(worklist, [](Operation *left, Operation *right) {
- return left->isBeforeInBlock(right);
- });
- } else {
- llvm::sort(worklist, [](Operation *left, Operation *right) {
- return right->isBeforeInBlock(left);
- });
- }
- }
-
- Block *block;
- llvm::SmallVector<Operation *, 4> worklist;
- llvm::SmallDenseSet<Operation *, 4> visited;
- bool inverseTopological;
- bool dirty = false;
-};
-
-LogicalResult fuseInputs(DispatchRegion &dispatchRegion,
- OpDispatchPolicy &policy) {
- LLVM_DEBUG(llvm::dbgs() << "++ FUSING INPUTS\n");
-
- FusionWorklist worklist(dispatchRegion.op.getOperation()->getBlock());
- worklist.addOperandDefs(dispatchRegion.op.getOperands());
-
- while (Operation *nextOp = worklist.popNext()) {
- if (!policy.isDispatchable(nextOp)) continue;
- auto action = policy.fuseInput(dispatchRegion.anchorOp, nextOp);
- LLVM_DEBUG(llvm::dbgs().indent(2));
- if (action == OpDispatchPolicy::FusionType::MOVE_INTO) {
- return nextOp->emitError() << "cannot fuse input with MOVE_INTO action";
- } else if (action == OpDispatchPolicy::FusionType::DISABLED) {
- LLVM_DEBUG(llvm::dbgs()
- << "- SKIP NON FUSABLE INPUT: " << *nextOp << "\n");
- continue;
- }
-
- // Always inline inputs at the top of the block. Since we are processing
- // the worklist in inverse topological order, this preserves the original
- // ordering.
- LLVM_DEBUG(llvm::dbgs() << "- FUSABLE INPUT(" << static_cast<int>(action)
- << "): " << *nextOp << "\n");
- OpBuilder builder(nextOp->getContext());
- auto *inlinedOp = dispatchRegion.op.inlineOp(nextOp, builder);
- if (!inlinedOp) {
- return failure();
- }
- worklist.addOperandDefs(nextOp->getOperands());
-
- // Erase the op if it has no uses. This keeps it from forming regions
- // that will be dce'd later (or getting in the way of the benefit
- // scheme). Note that dispatchable ops have no side effects, which
- // makes this simple check safe.
- // The dispatch region must be optimized to remove unused arguments
- // resulting from this fusion.
- if (nextOp->use_empty()) {
- nextOp->erase();
- }
- }
-
- return success();
-}
-
-// Inlining an op into a dispatch region makes the operands of the op the
-// operands of the dispatch region (if the operands arent already defined in the
-// dispatch region). The dispatch region has to be moved just after the last
-// defined operand for SSA value use to be valid.
-static LogicalResult moveDispatchOp(DispatchRegionOp dispatchRegionOp,
- Operation *inlinedOp) {
- // Check the operation that is the lexicographically first to produce an
- // operand to the inlinedOp
- Optional<Operation *> lastOperandDef = llvm::None;
- for (Value operand : inlinedOp->getOperands()) {
- if (Operation *definingOp = operand.getDefiningOp()) {
- if (!lastOperandDef ||
- lastOperandDef.getValue()->isBeforeInBlock(definingOp)) {
- lastOperandDef = definingOp;
- }
- }
- }
- // Check for values that are used in the region of the op but captured from
- // outside the region.
- llvm::SetVector<Value> capturedValues;
- getUsedValuesDefinedAbove(inlinedOp->getRegions(), capturedValues);
- for (Value capturedValue : capturedValues) {
- if (Operation *definingOp = capturedValue.getDefiningOp()) {
- if (!lastOperandDef ||
- lastOperandDef.getValue()->isBeforeInBlock(definingOp)) {
- lastOperandDef = definingOp;
- }
- }
- }
- // If the last operand def is already before the dispatch region, there is
- // nothing to do.
- if (!lastOperandDef ||
- lastOperandDef.getValue()->isBeforeInBlock(dispatchRegionOp)) {
- return success();
- }
-
- // The dispatch region needs to be moved after the lastOperandDef, but before
- // the first use.
- Optional<Operation *> firstUse = llvm::None;
- for (Operation *user : dispatchRegionOp.getOperation()->getUsers()) {
- if (!firstUse || user->isBeforeInBlock(*firstUse)) {
- firstUse = user;
- }
- }
- if (firstUse && firstUse.getValue()->isBeforeInBlock(*lastOperandDef))
- return failure();
- dispatchRegionOp.getOperation()->moveAfter(lastOperandDef.getValue());
- return success();
-}
-
-LogicalResult fuseOutputs(DispatchRegion &dispatchRegion,
- OpDispatchPolicy &policy) {
- LLVM_DEBUG(llvm::dbgs() << "++ FUSING OUTPUT\n");
-
- FusionWorklist worklist(dispatchRegion.op.getOperation()->getBlock(),
- /*inverseTopological=*/false);
- worklist.addResultUses(dispatchRegion.op.getResults());
-
- while (Operation *nextOp = worklist.popNext()) {
- if (!policy.isDispatchable(nextOp)) continue;
- auto action = policy.fuseOutput(dispatchRegion.anchorOp, nextOp);
- LLVM_DEBUG(llvm::dbgs().indent(2));
- if (action == OpDispatchPolicy::FusionType::DISABLED) {
- LLVM_DEBUG(llvm::dbgs()
- << "- SKIP NON FUSABLE INPUT: " << *nextOp << "\n");
- continue;
- }
- if (action != OpDispatchPolicy::FusionType::MOVE_INTO) {
- return nextOp->emitError()
- << "cannot fuse output except with MOVE_INTO action";
- }
- if (failed(moveDispatchOp(dispatchRegion.op, nextOp))) {
- LLVM_DEBUG(llvm::dbgs() << "- SKIP Fusion due to SSA use-def violation "
- << *nextOp << "\n");
- continue;
- }
- LLVM_DEBUG(llvm::dbgs() << "- FUSABLE OUTPUT(" << static_cast<int>(action)
- << "): " << *nextOp << "\n");
- // Since results will be redirected to the region results, need to scan
- // for worklist items before changing use-def chain.
- worklist.addResultUses(nextOp->getResults());
- OpBuilder builder(nextOp->getContext());
- auto *inlinedOp =
- dispatchRegion.op.inlineOp(nextOp, builder, /*positionAtEnd=*/true);
- if (!inlinedOp) {
- return failure();
- }
- dispatchRegion.returnAndReplaceUses(nextOp, inlinedOp);
- if (nextOp->use_empty()) {
- nextOp->erase();
- }
- }
-
- return success();
-}
-
-LogicalResult processBlock(Block &block, OpDispatchPolicy &policy) {
- int maxAnchorBenefit =
- std::numeric_limits<OpDispatchPolicy::AnchorBenefit>::max();
- // Maps DispatchRegionOp to the anchor op.
- llvm::DenseMap<Operation *, Operation *> dispatchRegions;
- // Per iteration scratch.
- llvm::SmallVector<DispatchableOp, 10> dispatchableOps;
-
- // Loop backwards from high anchor benefit to low.
- for (;;) {
- dispatchableOps.clear();
- // Enumerate un-dispatched ops.
- findDispatchableAnchorOps(block, policy, maxAnchorBenefit, dispatchableOps);
- if (dispatchableOps.empty()) break;
- llvm::sort(dispatchableOps);
-
- // Traversing from back->front will produce ops in [anchorPriority, index]
- // order.
- auto &d = dispatchableOps.back();
- if (d.anchorBenefit <= 0) break;
- LLVM_DEBUG(llvm::dbgs() << "FORM DISPATCH REGION(" << d.index << ":"
- << d.anchorBenefit << "): " << *d.op << "\n");
- auto dispatchRegion = DispatchRegion::form(d.op);
- if (!dispatchRegion) return failure();
- dispatchRegions.insert(
- std::make_pair(dispatchRegion->op, dispatchRegion->anchorOp));
-
- // Fuse outputs prior to inputs, since they can yield more things to
- // evaluate for input fusion.
- if (failed(fuseOutputs(*dispatchRegion, policy))) return failure();
- if (failed(fuseInputs(*dispatchRegion, policy))) return failure();
-
- // Ensure all unused operands and results are dce'd.
- // Note that this may delete the op itself if it is unused.
- optimizeClosureOp(dispatchRegion->op);
- if (dispatchRegion->op) {
- hoistDispatchRegionMetadataOps(*dispatchRegion, policy);
- }
- }
- return success();
-}
-
-// Identifies dispatchable ops and moves them into dispatch regions.
-// Some ops, such as call, will be deferred until following passes.
-class IdentifyDispatchRegions2Pass
- : public IdentifyDispatchRegions2Base<IdentifyDispatchRegions2Pass> {
- public:
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::Flow::FlowDialect>();
- }
-
- void runOnOperation() override {
- // NOTE: we require the DispatchabilityAnalysisPass to have run first.
- auto dispatchability = getCachedParentAnalysis<Dispatchability>();
- FuncOp func = getOperation();
- if (!dispatchability.hasValue()) {
- func.emitError()
- << "dispatchability analysis not performed "
- "on module; run -iree-flow-dispatchability-analysis first";
- return signalPassFailure();
- }
-
- OpDispatchPolicy policy(*dispatchability);
- for (auto &block : getOperation()) {
- if (failed(processBlock(block, policy))) {
- return signalPassFailure();
- }
- }
- }
-};
-
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>> createIdentifyDispatchRegions2Pass() {
- return std::make_unique<IdentifyDispatchRegions2Pass>();
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
deleted file mode 100644
index 54dffeb..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "iree/compiler/Dialect/Shape/IR/Builders.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeTypes.h"
-#include "iree/compiler/Dialect/Shape/Utils/TypeConversion.h"
-#include "llvm/Support/Debug.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/Pass/Pass.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-
-using Shape::getShapeToPrimitiveTypeExpander;
-
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-static llvm::cl::opt<bool> traceDispatchTensors(
- "iree-flow-trace-dispatch-tensors",
- llvm::cl::desc("Trace input/output values for each dispatch function"),
- llvm::cl::init(false));
-
-// Converts a dispatch_region into a dispatch to the outlined region function.
-LogicalResult convertToDispatchOp(DispatchRegionOp regionOp,
- ExecutableOp executableOp,
- DispatchEntryOp entryPointOp,
- FuncOp outlinedFuncOp) {
- // Insert at the same place as the original region.
- OpBuilder builder(regionOp);
-
- // Perform shape to primitive type expansion.
- auto &typeExpander = getShapeToPrimitiveTypeExpander();
- SmallVector<Value, 4> origArgs(regionOp.args());
- SmallVector<Value, 4> newArgs;
- if (failed(typeExpander.expandSourceValuesToTarget(
- regionOp.getLoc(), origArgs, newArgs, builder))) {
- return failure();
- }
-
- auto getTensorTypeArgs = [](auto args) {
- SmallVector<Value, 4> res;
- for (auto arg : args) {
- if (arg.getType().template isa<TensorType>()) res.push_back(arg);
- }
- return res;
- };
- if (traceDispatchTensors) {
- std::string str = "Input for " + std::string(outlinedFuncOp.getName());
- builder.create<TensorTraceOp>(regionOp.getLoc(), builder.getStringAttr(str),
- getTensorTypeArgs(newArgs));
- }
-
- SmallVector<Value, 4> operandDynamicDims;
- for (auto operand : regionOp.args()) {
- if (operand.getType().isa<ShapedType>()) {
- operandDynamicDims.append(Shape::buildOrFindDynamicDimsForValue(
- regionOp.getLoc(), operand, builder));
- }
- }
- SmallVector<Value, 4> resultDynamicDims;
- for (auto result : regionOp.results()) {
- if (result.getType().isa<ShapedType>()) {
- resultDynamicDims.append(Shape::buildOrFindDynamicDimsForValue(
- regionOp.getLoc(), result, builder));
- }
- }
-
- // Create the dispatch op to the executable function.
- auto dispatchOp = builder.create<DispatchOp>(
- regionOp.getLoc(), entryPointOp, ValueRange{regionOp.workload()},
- outlinedFuncOp.getType().getResults(), resultDynamicDims, newArgs,
- operandDynamicDims, ArrayRef<int64_t>{});
-
- if (traceDispatchTensors) {
- std::string str = "Output for " + std::string(outlinedFuncOp.getName());
- builder.create<TensorTraceOp>(regionOp.getLoc(), builder.getStringAttr(str),
- getTensorTypeArgs(dispatchOp.getResults()));
- }
-
- // Replace uses of the existing results with the new results.
- for (int i = 0; i < regionOp.getNumResults(); ++i) {
- regionOp.getResult(i).replaceAllUsesWith(dispatchOp.getResult(i));
- }
-
- // Erase original region.
- regionOp.erase();
-
- return success();
-}
-
-// Converts a region body to a function.
-// The region entry block args and return terminators are used to derive the
-// function type.
-FuncOp createRegionFunction(Location loc, StringRef functionName,
- Region ®ion) {
- // Build function type matching 1:1 with the region signature.
- SmallVector<Type, 4> operandTypes;
- SmallVector<Type, 4> resultTypes;
- auto &entryBlock = region.front();
- for (auto &operand : entryBlock.getArguments()) {
- operandTypes.push_back(operand.getType());
- }
- for (auto &block : region.getBlocks()) {
- if (auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(block.back())) {
- resultTypes = llvm::to_vector<4>(returnOp.getOperandTypes());
- break;
- }
- }
-
- // Clone region into the function body.
- auto functionType =
- FunctionType::get(region.getContext(), operandTypes, resultTypes);
- auto funcOp = FuncOp::create(loc, functionName, functionType);
- BlockAndValueMapping mapping;
- region.cloneInto(&funcOp.getBody(), mapping);
-
- // Replace flow.return with std.return.
- for (auto &block : funcOp.getBlocks()) {
- if (auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(block.back())) {
- OpBuilder builder(returnOp);
- builder.create<mlir::ReturnOp>(
- returnOp.getLoc(), llvm::to_vector<4>(returnOp.getOperands()));
- returnOp.erase();
- }
- }
-
- // Remove any tie_shapes not from entry block args.
- // TODO(laurenzo): Remove this once we are not materializing ties in
- // dispatch regions at all. For now, this at least provides a better
- // contract to the backends without leaking the fact that dispatch
- // formation fully materializes ties.
- auto *newEntryBlock = &funcOp.getBlocks().front();
- funcOp.walk([&](Shape::TieShapeOp tieOp) {
- if (auto blockArg = tieOp.operand().dyn_cast<BlockArgument>()) {
- if (blockArg.getOwner() == newEntryBlock) return;
- }
- // Elide.
- tieOp.result().replaceAllUsesWith(tieOp.operand());
- tieOp.erase();
- });
-
- // Expand shape types to primitives.
- auto &typeExpander = getShapeToPrimitiveTypeExpander();
- OpBuilder expandBuilder(funcOp.getContext());
- if (failed(typeExpander.expandFunctionSignature(funcOp, expandBuilder)) ||
- failed(typeExpander.expandAllReturnLikeTerminators<mlir::ReturnOp>(
- funcOp, expandBuilder))) {
- return nullptr;
- }
-
- return funcOp;
-}
-
-// Outlines a dispatch region into a flow.executable.
-LogicalResult outlineDispatchRegion(
- DispatchRegionOp regionOp, int outlinedRegionOrdinal,
- llvm::StringMap<FuncOp> &dispatchableFuncOps) {
- // Create the dispatch function.
- auto parentFuncOp = regionOp->getParentOfType<FuncOp>();
- std::string namePrefix = parentFuncOp.getName().str() + "_ex_dispatch_" +
- std::to_string(outlinedRegionOrdinal);
-
- // Convert the region to a function.
- auto dispatchFuncOp =
- createRegionFunction(regionOp.getLoc(), namePrefix, regionOp.body());
- if (!dispatchFuncOp) {
- return failure();
- }
-
- // Create the executable with the region cloned into it.
- auto executableOp = createExecutable(
- regionOp.getLoc(), namePrefix, {dispatchFuncOp},
- parentFuncOp->getParentOfType<ModuleOp>(), dispatchableFuncOps);
- executableOp.getOperation()->moveBefore(parentFuncOp);
- executableOp.setPrivate();
-
- // Add dispatch export pointing at the function.
- OpBuilder builder(executableOp.body());
- auto entryPointOp = builder.create<DispatchEntryOp>(
- regionOp.getLoc(), builder.getStringAttr(dispatchFuncOp.getName()),
- builder.getSymbolRefAttr(dispatchFuncOp),
- TypeAttr::get(dispatchFuncOp.getType()),
- /*workgroup_rank=*/IntegerAttr{});
-
- // Finally convert the dispatch region into a dispatch to the outlined func.
- return convertToDispatchOp(regionOp, executableOp, entryPointOp,
- dispatchFuncOp);
-}
-
-} // namespace
-
-class OutlineDispatchRegionsPass
- : public OutlineDispatchRegionsBase<OutlineDispatchRegionsPass> {
- public:
- OutlineDispatchRegionsPass() = default;
-
- void runOnOperation() override {
- auto dispatchability = getCachedAnalysis<Dispatchability>();
- llvm::StringMap<FuncOp> dispatchableFuncOps;
- if (dispatchability.hasValue()) {
- // if we do not get dispatchability from cache,
- // we should keep dispatchableFuncOps empty to be comptaible as before
- dispatchability.getValue().get().walkDispatchableOps([&](FuncOp funcOp) {
- dispatchableFuncOps[funcOp.getName()] = funcOp;
- });
- }
-
- // TODO(benvanik): replace with a pattern rewriter?
- auto funcOps = llvm::to_vector<32>(getOperation().getOps<FuncOp>());
- for (auto funcOp : funcOps) {
- // Outline all of the dispatch regions ops in this function.
- SmallVector<DispatchRegionOp, 8> dispatchRegionOps;
- funcOp.walk(
- [&](DispatchRegionOp op) { dispatchRegionOps.push_back(op); });
- for (int i = 0; i < dispatchRegionOps.size(); ++i) {
- if (failed(outlineDispatchRegion(dispatchRegionOps[i], i,
- dispatchableFuncOps))) {
- return signalPassFailure();
- }
- }
- }
- }
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegionsPass() {
- return std::make_unique<OutlineDispatchRegionsPass>();
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp
index 779006f..e19401c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp
@@ -14,11 +14,9 @@
#include <utility>
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
#include "iree/compiler/Dialect/Shape/IR/Builders.h"
#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
#include "iree/compiler/Dialect/Shape/IR/ShapeTypes.h"
@@ -39,6 +37,38 @@
namespace Flow {
namespace {
+// Creates a flow.executable out of a set of functions, pulling in all other
+// functions reachable by the provided functions.
+static ExecutableOp createExecutable(Location loc, StringRef executableName,
+ ArrayRef<FuncOp> funcOps,
+ ModuleOp parentModuleOp) {
+ assert(!funcOps.empty() && "must have at least one entry function");
+
+ // Create the executable that will contain the outlined region.
+ // NOTE: this will get uniquified if we have multiple in the same block.
+ OpBuilder parentModuleBuilder(&parentModuleOp.getBody()->back());
+ auto executableOp =
+ parentModuleBuilder.create<IREE::Flow::ExecutableOp>(loc, executableName);
+
+ // Create the inner ModuleOp that contains the original functions. We need
+ // to provide this shim as some ops (like std.call) look for the
+ // containing module to provide symbol resolution.
+ OpBuilder executableBuilder(executableOp);
+ executableBuilder.setInsertionPointToStart(&executableOp.getBlock());
+ auto innerModule = executableBuilder.create<ModuleOp>(loc);
+ for (auto funcOp : funcOps) {
+ innerModule.push_back(funcOp);
+ }
+
+ // Copy all reachable functions into the executable.
+ // Linker passes may dedupe these later on.
+ OpBuilder innerModuleBuilder = OpBuilder::atBlockEnd(innerModule.getBody());
+ innerModuleBuilder.setInsertionPoint(innerModule.getBody(),
+ ++innerModule.getBody()->begin());
+
+ return executableOp;
+}
+
// Converts a dispatch region op into a dispatch op to the outlined region.
static LogicalResult convertToDispatchOp(DispatchWorkgroupsOp regionOp,
ExecutableOp executableOp,
@@ -173,8 +203,7 @@
// Outlines a dispatch region into a flow.executable and replaces the region op
// with a dispatch to that outlined executable.
static LogicalResult outlineDispatchWorkgroupsOp(
- std::string namePrefix, DispatchWorkgroupsOp regionOp,
- llvm::StringMap<FuncOp> &dispatchableFuncOps) {
+ std::string namePrefix, DispatchWorkgroupsOp regionOp) {
// Convert the region to a free-floating function.
auto workgroupFuncOp =
createWorkgroupFunc(regionOp.getLoc(), namePrefix, regionOp.body());
@@ -184,9 +213,9 @@
// Create the executable with the region cloned into it.
auto parentFuncOp = regionOp->getParentOfType<FuncOp>();
- auto executableOp = createExecutable(
- regionOp.getLoc(), namePrefix, {workgroupFuncOp},
- parentFuncOp->getParentOfType<ModuleOp>(), dispatchableFuncOps);
+ auto executableOp =
+ createExecutable(regionOp.getLoc(), namePrefix, {workgroupFuncOp},
+ parentFuncOp->getParentOfType<ModuleOp>());
executableOp.getOperation()->moveBefore(parentFuncOp);
executableOp.setPrivate();
@@ -210,14 +239,6 @@
OutlineDispatchRegions2Pass() = default;
void runOnOperation() override {
- // Mark all functions that are dispatchable and can be moved into dispatch
- // executables when they are called. A dispatch region using a
- // non-dispatchable function is considered an error.
- auto &dispatchability = getAnalysis<Dispatchability>();
- llvm::StringMap<FuncOp> dispatchableFuncOps;
- dispatchability.walkDispatchableOps(
- [&](FuncOp funcOp) { dispatchableFuncOps[funcOp.getName()] = funcOp; });
-
// Convert each dispatch region into a flow.executable + dispatch op.
for (auto funcOp : getOperation().getOps<FuncOp>()) {
// Outline all of the dispatch regions ops in this function.
@@ -226,8 +247,8 @@
for (int i = 0; i < dispatchWorkgroupsOps.size(); ++i) {
std::string namePrefix =
funcOp.getName().str() + "_dispatch_" + std::to_string(i);
- if (failed(outlineDispatchWorkgroupsOp(
- namePrefix, dispatchWorkgroupsOps[i], dispatchableFuncOps))) {
+ if (failed(outlineDispatchWorkgroupsOp(namePrefix,
+ dispatchWorkgroupsOps[i]))) {
return signalPassFailure();
}
}
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index 4d9858f..214f864 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
@@ -73,9 +73,6 @@
static void buildHLOInputTransformPassPipeline(OpPassManager &passManager) {
passManager.addNestedPass<FuncOp>(
IREE::Flow::createHLOToHLOPreprocessingPass());
- // TODO(ataei): This should run as part of createHLOToHLOPreprocessingPass
- // which will break VMLA backend.
- passManager.addNestedPass<FuncOp>(createDecomposeHLOClampPass());
// Run passes to remove shape constraints. HLO lowering inserts them, but they
// are not desired here.
@@ -104,8 +101,7 @@
});
}
-void buildFlowTransformPassPipeline(OpPassManager &passManager,
- bool dispatchLinalgOnTensors) {
+void buildFlowTransformPassPipeline(OpPassManager &passManager) {
//----------------------------------------------------------------------------
// Entry dialect cleanup
//----------------------------------------------------------------------------
@@ -202,51 +198,39 @@
IREE::Flow::createPrePartitioningConversionPass());
passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
- if (dispatchLinalgOnTensors) {
- // TODO(benvanik): move up to input; requires pre-partitioning conversion
- // to be reworked first.
- passManager.addNestedPass<FuncOp>(
- mlir::iree_compiler::createHLOToLinalgOnTensorsPass(true));
+ // TODO(benvanik): move up to input; requires pre-partitioning conversion
+ // to be reworked first.
+ passManager.addNestedPass<FuncOp>(
+ mlir::iree_compiler::createHLOToLinalgOnTensorsPass(true));
- if (clEnable1x1ConvToMatmul) {
- passManager.addNestedPass<FuncOp>(
- mlir::iree_compiler::createConvert1x1ConvToMatmulPass());
- }
- if (clEnableConvToImg2Col) {
- passManager.addNestedPass<FuncOp>(
- mlir::iree_compiler::createConvertConv2DToImg2ColPass());
- }
-
+ if (clEnable1x1ConvToMatmul) {
passManager.addNestedPass<FuncOp>(
- mlir::createConvertElementwiseToLinalgPass());
- passManager.addNestedPass<FuncOp>(
- mlir::createLinalgFoldUnitExtentDimsPass());
- passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
- passManager.addNestedPass<FuncOp>(
- mlir::iree_compiler::createFusionOfTensorOpsPass());
- passManager.addNestedPass<FuncOp>(
- IREE::Flow::createConvertToFlowTensorOpsPass());
- passManager.addNestedPass<FuncOp>(mlir::createCSEPass());
-
- passManager.addNestedPass<FuncOp>(
- IREE::Flow::createDispatchLinalgOnTensorsPass());
- // NOTE: required because the current dispatch-linalg-on-tensors pass
- // creates a lot of dead IR that needs to be cleaned up.
- passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
-
- // Outline the dispatch regions into their own functions wrapped in
- // executables.
- passManager.addPass(IREE::Flow::createOutlineDispatchRegions2Pass());
- } else {
- // DEPRECATED: legacy HLO-based path.
- passManager.addPass(IREE::Flow::createDispatchabilityAnalysisPass());
- passManager.addNestedPass<FuncOp>(
- IREE::Flow::createIdentifyDispatchRegions2Pass());
- passManager.addNestedPass<FuncOp>(createCSEPass());
- passManager.addNestedPass<FuncOp>(
- IREE::Flow::createFoldCompatibleDispatchRegionsPass());
- passManager.addPass(IREE::Flow::createOutlineDispatchRegionsPass());
+ mlir::iree_compiler::createConvert1x1ConvToMatmulPass());
}
+ if (clEnableConvToImg2Col) {
+ passManager.addNestedPass<FuncOp>(
+ mlir::iree_compiler::createConvertConv2DToImg2ColPass());
+ }
+
+ passManager.addNestedPass<FuncOp>(
+ mlir::createConvertElementwiseToLinalgPass());
+ passManager.addNestedPass<FuncOp>(mlir::createLinalgFoldUnitExtentDimsPass());
+ passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(
+ mlir::iree_compiler::createFusionOfTensorOpsPass());
+ passManager.addNestedPass<FuncOp>(
+ IREE::Flow::createConvertToFlowTensorOpsPass());
+ passManager.addNestedPass<FuncOp>(mlir::createCSEPass());
+
+ passManager.addNestedPass<FuncOp>(
+ IREE::Flow::createDispatchLinalgOnTensorsPass());
+ // NOTE: required because the current dispatch-linalg-on-tensors pass
+ // creates a lot of dead IR that needs to be cleaned up.
+ passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+
+ // Outline the dispatch regions into their own functions wrapped in
+ // executables.
+ passManager.addPass(IREE::Flow::createOutlineDispatchRegions2Pass());
// Cleanup identity ops that clutter up the IR and canonicalize.
passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
@@ -310,13 +294,7 @@
"iree-flow-transformation-pipeline",
"Runs the full IREE flow dialect transformation pipeline",
[](OpPassManager &passManager) {
- buildFlowTransformPassPipeline(passManager, false);
- });
- PassPipelineRegistration<> tensorTransformPassPipeline(
- "iree-flow-transformation-pipeline-tensors",
- "Runs the full IREE flow dialect transformation pipeline",
- [](OpPassManager &passManager) {
- buildFlowTransformPassPipeline(passManager, true);
+ buildFlowTransformPassPipeline(passManager);
});
}
@@ -330,8 +308,8 @@
registerPasses();
// Pipelines.
- registerFlowTransformPassPipeline();
registerInputTransformPassPipeline();
+ registerFlowTransformPassPipeline();
}
} // namespace Flow
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h
index 2a64d7b..34df061 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.h
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h
@@ -52,8 +52,7 @@
// buildInputTransformPassPipeline
// buildFlowTransformPassPipeline
// <run conversion from flow to sequencer/hal/vm/etc>
-void buildFlowTransformPassPipeline(OpPassManager &passManager,
- bool dispatchLinalgOnTensors = true);
+void buildFlowTransformPassPipeline(OpPassManager &passManager);
void registerFlowTransformPassPipeline();
@@ -86,28 +85,14 @@
std::unique_ptr<OperationPass<ModuleOp>> createExpandVariableDynamicDimsPass();
//===----------------------------------------------------------------------===//
-// Dispatches (flow.dispatch.region)
+// Dispatches (flow.dispatch.workgroups)
//===----------------------------------------------------------------------===//
/// Pass to perform dispatch of Linalg on tensor ops by tiling and distribution.
/// A dispatch region is created for each tiled loop nest.
std::unique_ptr<OperationPass<FuncOp>> createDispatchLinalgOnTensorsPass();
-// Analyzes a module to identify which functions are dispatchable.
-// This information is cached on the module and is used by other FuncOp-scoped
-// passes to quickly access the module-level dispatchability information.
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityAnalysisPass();
-
-// Identifies dispatchable regions of functions and wraps them in
-// flow.dispatch_regions (version 2).
-std::unique_ptr<OperationPass<FuncOp>> createIdentifyDispatchRegions2Pass();
-
-// Folds multiple dispatch regions together that have compatible workloads.
-std::unique_ptr<OperationPass<FuncOp>>
-createFoldCompatibleDispatchRegionsPass();
-
// Outlines dispatch regions into executables.
-std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegionsPass();
std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegions2Pass();
// Injects tracing markers for dispatch operation tensor inputs and outputs.
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.td b/iree/compiler/Dialect/Flow/Transforms/Passes.td
index ad62531..a833e8a 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.td
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.td
@@ -29,12 +29,6 @@
let constructor = "mlir::iree_compiler::IREE::Flow::createDeduplicateExecutablesPass()";
}
-def DispatchabilityAnalysis :
- Pass<"iree-flow-dispatchability-analysis", "ModuleOp"> {
- let summary = "Analyzes functions to determine their dispatchability";
- let constructor = "mlir::iree_compiler::IREE::Flow::createDispatchabilityAnalysisPass()";
-}
-
def DispatchLinalgOnTensors :
Pass<"iree-flow-dispatch-linalg-on-tensors-pass", "FuncOp"> {
let summary = "Dispatch Linalg operations on tensors by using tile and distribute";
@@ -53,12 +47,6 @@
let constructor = "mlir::iree_compiler::IREE::Flow::createExportBenchmarkFuncsPass()";
}
-def FoldCompatibleDispatchRegions :
- Pass<"iree-flow-fold-compatible-dispatch-regions", "FuncOp"> {
- let summary = "Folds dispatch regions that have compatible workloads";
- let constructor = "mlir::iree_compiler::IREE::Flow::createFoldCompatibleDispatchRegionsPass()";
-}
-
def FormStreams :
Pass<"iree-flow-form-streams", "FuncOp"> {
let summary = "Identifies dispatches that can be grouped into streams within functions";
@@ -77,12 +65,6 @@
let constructor = "mlir::iree_compiler::IREE::Flow::createHLOToHLOPreprocessingPass()";
}
-def IdentifyDispatchRegions2 :
- Pass<"iree-flow-identify-dispatch-regions2", "FuncOp"> {
- let summary = "Conservatively identifies dispatch regions in functions (v2)";
- let constructor = "mlir::iree_compiler::IREE::Flow::createIdentifyDispatchRegions2Pass()";
-}
-
def InjectDispatchTracing :
Pass<"iree-flow-inject-dispatch-tracing", "FuncOp"> {
let summary = "Injects dispatch region tracing";
@@ -95,12 +77,6 @@
let constructor = "mlir::iree_compiler::IREE::Flow::createLegalizeInputTypesPass()";
}
-def OutlineDispatchRegions :
- Pass<"iree-flow-outline-dispatch-regions", "ModuleOp"> {
- let summary = "Outlines dispatch regions into standalone functions";
- let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineDispatchRegionsPass()";
-}
-
def OutlineDispatchRegions2 :
Pass<"iree-flow-outline-dispatch-regions2", "ModuleOp"> {
let summary = "Outlines dispatch regions into executables";
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/BUILD b/iree/compiler/Dialect/Flow/Transforms/test/BUILD
index 7ab73fb..a88fda9 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/BUILD
+++ b/iree/compiler/Dialect/Flow/Transforms/test/BUILD
@@ -32,17 +32,11 @@
"dispatch_linalg_on_tensors_fusion.mlir",
"expand_variable_dynamic_dims.mlir",
"export_benchmark_funcs.mlir",
- "fold_compatible_dispatch_regions.mlir",
"form_streams.mlir",
"hlo_to_hlo_preprocessing.mlir",
"hlo_to_hlo_preprocessing_canoncalize_dot_general.mlir",
"hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir",
"hoist_unstreamable_ops.mlir",
- "identify_dispatch_regions2_enable_matmul_fusion.mlir",
- "identify_dispatch_regions2_hlo.mlir",
- "identify_dispatch_regions2_linalg.mlir",
- "identify_dispatch_regions2_shapes.mlir",
- "identify_dispatch_regions2_std_fusion.mlir",
"inject_dispatch_tracing.mlir",
"legalize_input_types.mlir",
"outline_dispatch_regions2.mlir",
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
index adb9779..2fcfbb2 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
@@ -21,17 +21,11 @@
"dispatch_linalg_on_tensors_fusion.mlir"
"expand_variable_dynamic_dims.mlir"
"export_benchmark_funcs.mlir"
- "fold_compatible_dispatch_regions.mlir"
"form_streams.mlir"
"hlo_to_hlo_preprocessing.mlir"
"hlo_to_hlo_preprocessing_canoncalize_dot_general.mlir"
"hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir"
"hoist_unstreamable_ops.mlir"
- "identify_dispatch_regions2_enable_matmul_fusion.mlir"
- "identify_dispatch_regions2_hlo.mlir"
- "identify_dispatch_regions2_linalg.mlir"
- "identify_dispatch_regions2_shapes.mlir"
- "identify_dispatch_regions2_std_fusion.mlir"
"inject_dispatch_tracing.mlir"
"legalize_input_types.mlir"
"outline_dispatch_regions2.mlir"
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir b/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
index e1465bf..5d3545a 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
@@ -7,25 +7,6 @@
return %0, %1 : tensor<5x5xf32>, tensor<3x5xf32>
}
}
-// CHECK-DAG: flow.variable @[[IN0_0:.+]] dense<{{.*}}> : tensor<5x3xf32>
-// CHECK-DAG: flow.variable @[[IN0_1:.+]] dense<{{.*}}> : tensor<3x5xf32>
-// CHECK: func @two_dispatch_ex_dispatch_0_benchmark
-// CHECK: %[[RES:.+]] = flow.ex.stream.fragment() : () -> tensor<5x5xf32> =
-// CHECK-DAG: %{{.+}} = flow.variable.load @[[IN0_0]] : tensor<5x3xf32>
-// CHECK-DAG: %{{.+}} = flow.variable.load @[[IN0_1]] : tensor<3x5xf32>
-// CHECK: %[[DISPATCH_RES:.+]] = flow.dispatch @two_dispatch_ex_dispatch_0::@two_dispatch_ex_dispatch_0[%{{.+}}](%{{.+}}, %{{.+}}) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
-// CHECK: flow.return %[[DISPATCH_RES]] : tensor<5x5xf32>
-// CHECK: iree.do_not_optimize(%[[RES]]) : tensor<5x5xf32>
-
-// CHECK-DAG: flow.variable @[[IN1_0:.+]] dense<{{.*}}> : tensor<3x5xf32>
-// CHECK-DAG: flow.variable @[[IN1_1:.+]] dense<{{.*}}> : tensor<5x5xf32>
-// CHECK: func @two_dispatch_ex_dispatch_1_benchmark
-// CHECK: %[[RES:.+]] = flow.ex.stream.fragment() : () -> tensor<3x5xf32>
-// CHECK-DAG: %{{.+}} = flow.variable.load @[[IN1_0]] : tensor<3x5xf32>
-// CHECK-DAG: %{{.+}} = flow.variable.load @[[IN1_1]] : tensor<5x5xf32>
-// CHECK: %[[DISPATCH_RES:.+]] = flow.dispatch @two_dispatch_ex_dispatch_1::@two_dispatch_ex_dispatch_1[%{{.+}}](%{{.+}}, %{{.+}}) : (tensor<3x5xf32>, tensor<5x5xf32>) -> tensor<3x5xf32>
-// CHECK: flow.return %[[DISPATCH_RES]] : tensor<3x5xf32>
-// CHECK: iree.do_not_optimize(%[[RES]]) : tensor<3x5xf32>
// CHECK-DAG: flow.variable @[[MAIN_IN_0:.+]] dense<{{.*}}> : tensor<5x3xf32>
// CHECK-DAG: flow.variable @[[MAIN_IN_1:.+]] dense<{{.*}}> : tensor<3x5xf32>
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
deleted file mode 100644
index d7ec93e..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
+++ /dev/null
@@ -1,160 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-fold-compatible-dispatch-regions %s | IreeFileCheck %s
-
-func @noFolding(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %cst = constant 4 : index
- %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
- flow.return %1 : tensor<4xf32>
- }
- return %0 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @noFolding
-// CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 4 : index
-// CHECK-NEXT: %0 = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-// CHECK-NEXT: %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-// CHECK-NEXT: flow.return %1 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %0 : tensor<4xf32>
-
-// -----
-
-func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %cst = constant 4 : index
- %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
- flow.return %1 : tensor<4xf32>
- }
- %2 = flow.dispatch.region[%cst : index](%arg2 = %arg0 : tensor<4xf32>, %arg3 = %0 : tensor<4xf32>) -> tensor<4xf32> {
- %3 = mhlo.subtract %arg3, %arg2 : tensor<4xf32>
- flow.return %3 : tensor<4xf32>
- }
- %4 = flow.dispatch.region[%cst : index](%arg4 = %arg0 : tensor<4xf32>, %arg5 = %2 : tensor<4xf32>) -> tensor<4xf32> {
- %5 = mhlo.multiply %arg4, %arg5 : tensor<4xf32>
- flow.return %5 : tensor<4xf32>
- }
- return %4 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @elementwiseOps
-// CHECK: %[[WORKLOAD0:.+]] = constant 4
-// CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-// CHECK-NEXT: %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-// CHECK-NEXT: %2 = mhlo.subtract %1, %arg1 : tensor<4xf32>
-// CHECK-NEXT: %3 = mhlo.multiply %arg1, %2 : tensor<4xf32>
-// CHECK-NEXT: flow.return %3 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK: return %[[R0]] : tensor<4xf32>
-
-// -----
-
-func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %cst = constant 16 : index
- %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
- flow.return %3 : tensor<4x4xf32>
- }
- %cst_0 = constant 16 : index
- %1 = flow.dispatch.region[%cst_0 : index](%arg1 = %0 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- flow.return %3 : tensor<4x4xf32>
- }
- %cst_1 = constant 16 : index
- %2 = flow.dispatch.region[%cst_1 : index](%arg1 = %1 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
- flow.return %3 : tensor<4x4xf32>
- }
- return %2 : tensor<4x4xf32>
-}
-
-// CHECK-LABEL: func @interleavedDot
-// CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT: %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
-// CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[WORKLOAD1:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region[%[[WORKLOAD1]] : index](%arg1 = %[[R0]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT: %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[WORKLOAD2:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R2:.+]] = flow.dispatch.region[%[[WORKLOAD2]] : index](%arg1 = %[[R1]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT: %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
-// CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[R2]] : tensor<4x4xf32>
-
-// -----
-
-module {
- flow.variable @var1 dense<1.000000e+00> : tensor<4xf32>
- flow.variable @var2 dense<2.000000e+00> : tensor<4xf32>
- func @notDominate() -> tensor<4xf32> {
- %c4 = constant 4 : index
- %0 = flow.variable.load @var1 : tensor<4xf32>
- %1 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>) -> tensor<4xf32> {
- %4 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- flow.return %4 : tensor<4xf32>
- }
- %2 = flow.variable.load @var2 : tensor<4xf32>
- %3 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>, %arg1 = %2 : tensor<4xf32>) -> tensor<4xf32> {
- %4 = mhlo.subtract %arg1, %arg0 : tensor<4xf32>
- flow.return %4 : tensor<4xf32>
- }
- return %3 : tensor<4xf32>
- }
-}
-// CHECK-LABEL: func @notDominate
-// CHECK: flow.dispatch.region
-// CHECK: flow.dispatch.region
-
-// -----
-
-module {
- flow.variable @var1 dense<1.000000e+00> : tensor<4xf32>
- flow.variable @var2 dense<2.000000e+00> : tensor<4xf32>
- func @dominate() -> tensor<4xf32> {
- %c4 = constant 4 : index
- %0 = flow.variable.load @var1 : tensor<4xf32>
- %1 = flow.variable.load @var2 : tensor<4xf32>
- %2 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>) -> tensor<4xf32> {
- %4 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- flow.return %4 : tensor<4xf32>
- }
- %3 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>, %arg1 = %1 : tensor<4xf32>) -> tensor<4xf32> {
- %4 = mhlo.subtract %arg1, %arg0 : tensor<4xf32>
- flow.return %4 : tensor<4xf32>
- }
- return %3 : tensor<4xf32>
- }
-}
-// CHECK-LABEL: func @dominate
-// CHECK: flow.dispatch.region
-// CHECK-NOT: flow.dispatch.region
-
-// -----
-
-module {
- func @torch_index_select_producer(%arg0: tensor<5x1x5xi32>,
- %arg1: tensor<2xi32>) -> tensor<2x1x5xi32> {
- %c10 = constant 0 : index
- %0 = flow.dispatch.region[%c10 : index](%arg2 = %arg0 : tensor<5x1x5xi32>,
- %arg3 = %arg1 : tensor<2xi32>) -> tensor<2x1x5xi32> {
- %1 = "mhlo.torch_index_select"(%arg2, %arg3) {
- dim = 0 : i64,
- batch_dims = 0 : i64
- } : (tensor<5x1x5xi32>, tensor<2xi32>) -> tensor<2x1x5xi32>
- flow.return %1 : tensor<2x1x5xi32>
- }
- %1 = flow.dispatch.region[%c10 : index](%arg2 = %0 : tensor<2x1x5xi32>) -> tensor<2x1x5xi32> {
- %2 = mhlo.add %arg2, %arg2 : tensor<2x1x5xi32>
- flow.return %2 : tensor<2x1x5xi32>
- }
- return %1 : tensor<2x1x5xi32>
- }
-}
-// CHECK-LABEL: func @torch_index_select_producer
-// CHECK: flow.dispatch.region
-// CHECK-NEXT: mhlo.torch_index_select
-// CHECK-NEXT: mhlo.add
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir
deleted file mode 100644
index ae87444..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir
+++ /dev/null
@@ -1,187 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 -iree-enable-consumer-only-fusion -canonicalize %s | IreeFileCheck %s
-
-func @simpleDotAddMul
- (%arg0 : tensor<16x32xf32>, %arg1 : tensor<32x48xf32>,
- %arg2 : tensor<16x48xf32>, %arg3 : tensor<16x48xf32>) -> tensor<16x48xf32> {
- %0 = "mhlo.dot"(%arg0, %arg1) :
- (tensor<16x32xf32>, tensor<32x48xf32>) -> tensor<16x48xf32>
- %1 = mhlo.add %0, %arg2 : tensor<16x48xf32>
- %2 = mhlo.multiply %1, %arg3 : tensor<16x48xf32>
- return %2 : tensor<16x48xf32>
-}
-// CHECK-LABEL: func @simpleDotAddMul
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<16x32xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x48xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 768
-// CHECK-NEXT: %[[RESULT:.+]] = flow.dispatch.region[%[[WORKLOAD]] : index]
-// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG1]]
-// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG2]]
-// CHECK-SAME: %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG3]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T1:.+]] = "mhlo.dot"(%[[ARG4]], %[[ARG5]])
-// CHECK-NEXT: %[[T2:.+]] = mhlo.add %[[T1]], %[[ARG6]]
-// CHECK-NEXT: %[[T3:.+]] = mhlo.multiply %[[T2]], %[[ARG7]]
-// CHECK-NEXT: flow.return %[[T3]]
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RESULT]]
-
-// -----
-
-func @twoDots
- (%arg0 : tensor<16x32xf32>, %arg1 : tensor<32x48xf32>,
- %arg2 : tensor<16x48xf32>, %arg3 : tensor<16x64xf32>,
- %arg4 : tensor<16x64xf32>) -> tensor<16x64xf32> {
- %0 = "mhlo.dot"(%arg0, %arg1) :
- (tensor<16x32xf32>, tensor<32x48xf32>) -> tensor<16x48xf32>
- %1 = mhlo.add %0, %arg2 : tensor<16x48xf32>
- %2 = "mhlo.dot"(%1, %arg3) :
- (tensor<16x48xf32>, tensor<16x64xf32>) -> tensor<16x64xf32>
- %3 = mhlo.multiply %2, %arg4 : tensor<16x64xf32>
- return %3 : tensor<16x64xf32>
-}
-// CHECK-LABEL: func @twoDots
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<16x32xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x48xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor<16x64xf32>
-// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: tensor<16x64xf32>
-// CHECK-NEXT: %[[WORKLOAD1:.+]] = constant 1024
-// CHECK-NEXT: %[[WORKLOAD2:.+]] = constant 768
-// CHECK-NEXT: %[[RESULT1:.+]] = flow.dispatch.region[%[[WORKLOAD2]] : index]
-// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG1]]
-// CHECK-SAME: %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG2]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T1:.+]] = "mhlo.dot"(%[[ARG5]], %[[ARG6]])
-// CHECK-NEXT: %[[T2:.+]] = mhlo.add %[[T1]], %[[ARG7]]
-// CHECK-NEXT: flow.return %[[T2]]
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[RESULT2:.+]] = flow.dispatch.region[%[[WORKLOAD1]] : index]
-// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG3]]
-// CHECK-SAME: %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG4]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.dot"(%[[ARG5]], %[[ARG6]])
-// CHECK-NEXT: %[[T4:.+]] = mhlo.multiply %[[T3]], %[[ARG7]]
-// CHECK-NEXT: flow.return %[[T4]]
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RESULT2]]
-
-// -----
-
-func @moveDispatchOp
- (%arg0 : tensor<1x384x384xf32>, %arg1 : tensor<384x512xf32>,
- %arg2 : tensor<512xf32>) -> tensor<1x384x512xf32> {
- %0 = "mhlo.reshape"(%arg0) : (tensor<1x384x384xf32>) -> tensor<384x384xf32>
- %1 = "mhlo.dot"(%0, %arg1) :
- (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32>
- %2 = "mhlo.broadcast_in_dim"(%arg2)
- {broadcast_dimensions = dense<1> : tensor<1xi64>} :
- (tensor<512xf32>) -> tensor<384x512xf32>
- %3 = mhlo.add %1, %2 : tensor<384x512xf32>
- %4 = "mhlo.reshape"(%3) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
- return %4 : tensor<1x384x512xf32>
-}
-// CHECK-LABEL: func @moveDispatchOp
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<1x384x384xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<384x512xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<512xf32>
-// CHECK: %[[RESULT1:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG2]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T1:.+]] = "mhlo.broadcast_in_dim"(%[[ARG3]])
-// CHECK-NEXT: flow.return %[[T1]]
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[RESULT2:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG1]]
-// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T2:.+]] = "mhlo.reshape"(%[[ARG5]])
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.dot"(%[[T2]], %[[ARG3]])
-// CHECK-NEXT: %[[T4:.+]] = mhlo.add %[[T3]], %[[ARG4]]
-// CHECK-NEXT: %[[T5:.+]] = "mhlo.reshape"(%[[T4]])
-// CHECK-NEXT: flow.return %[[T5]]
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RESULT2]]
-
-// -----
-
-func @dot_fusion_with_different_shape
- (%arg0: tensor<384x512xf32>, %arg1: tensor<512x128xf32>,
- %arg2: tensor<384x128xf32>) -> tensor<4x384x32xf32> {
- %0 = "mhlo.dot"(%arg0, %arg1)
- : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
- %1 = mhlo.add %0, %arg2 : tensor<384x128xf32>
- %2 = "mhlo.reshape"(%1) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
- %3 = "mhlo.transpose"(%2) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>}
- : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
- %4 = "mhlo.reshape"(%3) : (tensor<1x4x384x32xf32>) -> tensor<4x384x32xf32>
- return %4 : tensor<4x384x32xf32>
-}
-
-// CHECK-LABEL: func @dot_fusion_with_different_shape
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<384x512xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<512x128xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<384x128xf32>
-// CHECK: %[[RESULT1:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]] = %[[ARG1]]
-// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG2]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T2:.+]] = "mhlo.dot"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[T3:.+]] = mhlo.add %[[T2]], %[[ARG5]]
-// CHECK-NEXT: %[[T4:.+]] = "mhlo.reshape"(%[[T3]])
-// CHECK-NEXT: flow.return %[[T4]]
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[RESULT2:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T2:.+]] = "mhlo.transpose"(%[[ARG3]])
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.reshape"(%[[T2]])
-// CHECK-NEXT: flow.return %[[T3]]
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RESULT2]]
-
-// -----
-
-func @dot_general_lower_swapped
- (%arg0 : tensor<2x3xf32>, %arg1 : tensor<1x1x2xf32>) -> tensor<3x1x1xf32> {
- %0 = "mhlo.transpose"(%arg0) {permutation = dense<[1, 0]> : tensor<2xi64>}
- : (tensor<2x3xf32>) -> tensor<3x2xf32>
- %1 = "mhlo.transpose"(%arg1) {permutation = dense<[2, 0, 1]> : tensor<3xi64>}
- : (tensor<1x1x2xf32>) -> tensor<2x1x1xf32>
- %2 = "mhlo.reshape"(%1) : (tensor<2x1x1xf32>) -> tensor<2x1xf32>
- %3 = "mhlo.dot"(%0, %2) {precision_config = ["DEFAULT", "DEFAULT"]}
- : (tensor<3x2xf32>, tensor<2x1xf32>) -> tensor<3x1xf32>
- %4 = "mhlo.reshape"(%3) : (tensor<3x1xf32>) -> tensor<3x1x1xf32>
- return %4 : tensor<3x1x1xf32>
-}
-// CHECK-LABEL: func @dot_general_lower_swapped
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x3xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1x1x2xf32>
-// CHECK: %[[RESULT1:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.transpose"(%[[ARG2]])
-// CHECK-NEXT: flow.return %[[T3]]
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[RESULT2:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]] = %[[ARG1]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.transpose"(%[[ARG2]])
-// CHECK-NEXT: flow.return %[[T3]]
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[RESULT3:.+]] = flow.dispatch.region
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]] = %[[RESULT2]]
-// CHECK-SAME: {
-// CHECK-NEXT: %[[T3:.+]] = "mhlo.reshape"(%[[ARG3]])
-// CHECK-NEXT: %[[T4:.+]] = "mhlo.dot"(%[[ARG2]], %[[T3]])
-// CHECK-NEXT: %[[T5:.+]] = "mhlo.reshape"(%[[T4]])
-// CHECK-NEXT: flow.return %[[T5]]
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RESULT3]]
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
deleted file mode 100644
index eb1e8e4..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
+++ /dev/null
@@ -1,165 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// CHECK-LABEL: @simpleMath
-func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4
- // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
- // CHECK-SAME: [%[[WORKLOAD]] : index]
- // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
- // CHECK-NEXT: %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- // CHECK-NEXT: flow.return %1 : tensor<4xf32>
- // CHECK-NEXT: }
- // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
- return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @isolatedDot
-func @isolatedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
- // NOTE: Fragile ordering. Workload constants are emitted in order a the
- // top of the block.
- // CHECK: flow.dispatch.region
- // CHECK: mhlo.add
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.dot"
- // CHECK: flow.dispatch.region
- // CHECK: mhlo.multiply
- %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
- %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
- return %2 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @sameBenefit
-func @sameBenefit(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- // Because these are all the same benefit, initial formation puts them each
- // in their own region.
- // CHECK: flow.dispatch.region
- // CHECK: mhlo.add
- // CHECK: flow.dispatch.region
- // CHECK: call @callee
- // CHECK: flow.dispatch.region
- // CHECK: mhlo.multiply
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
- %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- // CHECK: %[[WORKLOAD0:.+]] = constant 4 : index
- // CHECK: %[[R0:.+]] = flow.dispatch.region
- // CHECK-SAME: [%[[WORKLOAD0]] : index]
- // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
- // CHECK-NEXT: %1 = mhlo.multiply %arg1, %arg1 : tensor<4xf32>
- %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
- // CHECK-NEXT: flow.return %1 : tensor<4xf32>
- // CHECK-NEXT: }
- // CHECK: return %[[R0]] : tensor<4xf32>
- return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @copyAdd
-func @copyAdd(%arg0 : tensor<4xf32>) -> tensor<4x16xf32> {
- // Because these are all the same benefit, initial formation puts them each
- // in their own region.
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.broadcast_in_dim"
- // CHECK-NEXT: mhlo.add
- %0 = "mhlo.broadcast_in_dim"(%arg0) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor<4xf32>) -> tensor<4x16xf32>
- %1 = mhlo.add %0, %0 : tensor<4x16xf32>
- return %1 : tensor<4x16xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @single_reduction
-func @single_reduction(%arg0 : tensor<4x8xf32>) -> tensor<4xf32> {
- // CHECK-DAG: %[[INITIAL:.+]] = constant dense<0.000000e+00>
- %0 = constant dense<0.000000e+00> : tensor<f32>
- // CHECK-DAG: %[[WORKLOAD0:.+]] = constant 4 : index
- // CHECK: %[[RESULT:.+]] = flow.dispatch.region
- // CHECK-SAME: [%[[WORKLOAD0]] : index]
- // CHECK-SAME: (%arg1 = %arg0 : tensor<4x8xf32>) -> (tensor<4xf32>)
- // CHECK-NEXT: %[[CST_0:.+]] = constant dense<0.0
- // CHECK-NEXT: = "mhlo.reduce"(%arg1, %[[CST_0]])
- %1 = "mhlo.reduce"(%arg0, %0) ( {
- ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>):
- %2 = mhlo.add %arg1, %arg2 : tensor<f32>
- "mhlo.return"(%2) : (tensor<f32>) -> ()
- }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
- // CHECK: flow.return
- // CHECK: return %[[RESULT]] : tensor<4xf32>
- return %1 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @multi_reduction
-func @multi_reduction(%arg0 : tensor<4x8xf32>, %arg1 : tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
- // CHECK-DAG: %[[INITIALA:.+]] = constant dense<0.000000e+00>
- %0 = constant dense<0.000000e+00> : tensor<f32>
- // CHECK-DAG: %[[INITIALB:.+]] = constant dense<1.000000e+00>
- %1 = constant dense<1.000000e+00> : tensor<f32>
- // CHECK-DAG: %[[WORKLOAD0:.+]] = constant 4 : index
- // CHECK: %[[RESULT:.+]]:2 = flow.dispatch.region
- // CHECK-SAME: [%[[WORKLOAD0]] : index]
- // CHECK-SAME: (%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>)
- // CHECK-NEXT: %[[CST_0:.+]] = constant dense<0.0
- // CHECK-NEXT: %[[CST_1:.+]] = constant dense<1.0
- // CHECK-NEXT: = "mhlo.reduce"(%arg2, %arg3, %[[CST_0]], %[[CST_1]])
- %2, %3 = "mhlo.reduce"(%arg0, %arg1, %0, %1) ( {
- ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>):
- %4 = mhlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
- %5 = mhlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
- "mhlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
- }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
- // CHECK: flow.return
- // CHECK: return %[[RESULT]]#0, %[[RESULT]]#1 : tensor<4xf32>, tensor<4xf32>
- return %2, %3 : tensor<4xf32>, tensor<4xf32>
-}
-
-// TODO(benvanik): windowed reduction.
-
-// -----
-
-// CHECK-LABEL: @clone_broadcast
-func @clone_broadcast(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
- %splatCst = constant dense<1.0> : tensor<f32>
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.broadcast"
- // CHECK: mhlo.add
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.dot"
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.broadcast"
- // CHECK: mhlo.add
- %0 = "mhlo.broadcast"(%splatCst) {broadcast_sizes = dense<[4, 4]> : tensor<2xi64>} : (tensor<f32>) -> tensor<4x4xf32>
- %1 = "mhlo.add"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- %2 = "mhlo.dot"(%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- %3 = "mhlo.add"(%0, %2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- return %3: tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @reshaped_dot
-func @reshaped_dot(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
- // CHECK: flow.dispatch.region
- // CHECK: "mhlo.reshape"
- // CHECK: "mhlo.reshape"
- // CHECK: "mhlo.dot"
- // CHECK: "mhlo.reshape"
- %0 = "mhlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32>
- %1 = "mhlo.reshape"(%arg1) : (tensor<16xf32>) -> tensor<4x4xf32>
- %2 = "mhlo.dot"(%0, %1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
- %3 = "mhlo.reshape"(%2) : (tensor<4x4xf32>) -> tensor<16xf32>
- return %3 : tensor<16xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir
deleted file mode 100644
index 1290461..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir
+++ /dev/null
@@ -1,52 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-func @constant_capture(%arg0 : tensor<10x20xf32>) -> tensor<10x20xf32> {
- %cst1 = constant 1.0 : f32
- %cst2 = constant dense<2.0> : tensor<10x20xf32>
- %cst3 = constant dense<
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]> : tensor<10xf32>
- %0 = linalg.init_tensor [10, 20] : tensor<10x20xf32>
- %1 = linalg.generic
- {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0)>,
- affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%arg0, %cst2, %cst3
- : tensor<10x20xf32>, tensor<10x20xf32>, tensor<10xf32>)
- outs(%0 : tensor<10x20xf32>) {
- ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : f32, %arg4 : f32):
- %1 = addf %arg1, %cst1 : f32
- %2 = mulf %1, %arg2 : f32
- %3 = addf %2, %arg3 : f32
- linalg.yield %3 : f32
- } -> tensor<10x20xf32>
- return %1 : tensor<10x20xf32>
-}
-// CHECK: func @constant_capture
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<10x20xf32>
-// CHECK-DAG: %[[CST3:.+]] = constant dense<[1.000000e+00, 2.000000e+00,
-// CHECK-SAME: 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00,
-// CHECK-SAME: 7.000000e+00, 8.000000e+00, 9.000000e+00, 1.000000e+01]>
-// CHECK: %[[RESULT:.+]] = flow.dispatch.region[%{{.+}} : index](
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]] = %[[ARG0]]
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]] = %[[CST3]]
-// CHECK-SAME: ) -> (tensor<10x20xf32>) {
-// CHECK-DAG: %[[CST1:.+]] = constant 1.000000e+00 : f32
-// CHECK-DAG: %[[CST2:.+]] = constant dense<2.000000e+00> : tensor<10x20xf32>
-// CHECK-DAG: %[[T0:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf32>
-// CHECK: %[[RETURN:.+]] = linalg.generic
-// CHECK-SAME: ins(%[[ARG1]], %[[CST2]], %[[ARG2]]
-// CHECK-SAME: ) outs(%[[T0]] : tensor<10x20xf32>) {
-// CHECK-NEXT: ^{{[a-zA-Z0-9]+}}(
-// CHECK-SAME: %[[ARG3:.[a-zA-Z0-9_]+]]: f32,
-// CHECK-SAME: %[[ARG4:.[a-zA-Z0-9_]+]]: f32,
-// CHECK-SAME: %[[ARG5:.[a-zA-Z0-9_]+]]: f32,
-// CHECK-SAME: %[[ARG6:.[a-zA-Z0-9_]+]]: f32)
-// CHECK: %[[T0:.+]] = addf %[[ARG3]], %[[CST1]]
-// CHECK: %[[T1:.+]] = mulf %[[T0]], %[[ARG4]]
-// CHECK: %[[T2:.+]] = addf %[[T1]], %[[ARG5]]
-// CHECK: linalg.yield %[[T2]]
-// CHECK: }
-// CHECK: flow.return %[[RETURN]]
-// CHECK: }
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
deleted file mode 100644
index 4212eaf..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
+++ /dev/null
@@ -1,32 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// -----
-// CHECK-LABEL: @singleDispatchWithShapes
-// CHECK-SAME: %[[A0:[^:[:space:]]+]]: tensor<?x4xf32>,
-// CHECK-SAME: %[[A1:[^:[:space:]]+]]: !shapex.ranked_shape<[?,4]>,
-// CHECK-SAME: %[[A2:[^:[:space:]]+]]: !shapex.ranked_shape<[?,4]>
-func @singleDispatchWithShapes(%arg0 : tensor<?x4xf32>,
- %arg1 : !shapex.ranked_shape<[?,4]>, %arg2 : !shapex.ranked_shape<[?,4]>) -> tensor<?x4xf32> {
- // Lead-in tie_shape should be preserved outside of the dispatch region.
- // CHECK: %[[TS0:.+]] = shapex.tie_shape %[[A0]], %[[A1]]
- %0 = shapex.tie_shape %arg0, %arg1 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
- // Fragility: The order of CA? derives from the algorithm and is
- // otherwise not load bearing. Since on a single line, this is difficult to
- // make generic.
- // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[UNUSED_WORKLOAD:.+]] : index](
- // CHECK-SAME: %[[CA2:.+]] = %[[A2]] : !shapex.ranked_shape<[?,4]>,
- // CHECK-SAME: %[[CA0:.+]] = %{{.+}} : tensor<?x4xf32>,
- // CHECK-SAME: %[[CA1:.+]] = %[[A1]] : !shapex.ranked_shape<[?,4]>)
- // Dispatch region should contain captured tie_shapes.
- // CHECK: %[[R1:.+]] = shapex.tie_shape %[[CA0]], %[[CA1]]
- // CHECK: %[[R2:.+]] = mhlo.add %[[R1]], %[[R1]]
- // CHECK: %[[R3:.+]] = shapex.tie_shape %[[R2]], %[[CA2]]
- // CHECK: flow.return %[[R3]]
- %1 = mhlo.add %0, %0 : tensor<?x4xf32>
- %2 = shapex.tie_shape %1, %arg2 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
-
- // Lead-out tie_shape should be preserved outside of the dispatch region.
- // CHECK: %[[R4:.+]] = shapex.tie_shape %[[R0]], %[[A2]]
- // CHECK: return %[[R4]]
- return %2 : tensor<?x4xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir
deleted file mode 100644
index b7219bd..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// CHECK-LABEL: @empty
-func @empty() {
- // CHECK-NEXT: return
- return
-}
-
-// -----
-
-// CHECK-LABEL: @stdElementwiseOps
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- // CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4
- // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
- // CHECK-SAME: [%[[WORKLOAD]] : index]
- // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
- // CHECK-NEXT: %1 = addf %arg1, %arg1 : tensor<4xf32>
- %0 = addf %arg0, %arg0 : tensor<4xf32>
- // CHECK-NEXT: flow.return %1 : tensor<4xf32>
- // CHECK-NEXT: }
- // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
- return %0 : tensor<4xf32>
-}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
index 64c5f4b..1feda19 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
@@ -8,36 +8,6 @@
// -----
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
- %0 = addf %arg0, %arg0 : tensor<4xf32>
- %1 = subf %0, %arg0 : tensor<4xf32>
- %2 = mulf %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
-}
-
-// CHECK-LABEL: flow.executable @stdElementwiseOps_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @stdElementwiseOps_ex_dispatch_0
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @stdElementwiseOps_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %0 = addf %arg0, %arg0 : tensor<4xf32>
-// CHECK-NEXT: %1 = subf %0, %arg0 : tensor<4xf32>
-// CHECK-NEXT: %2 = mulf %1, %arg0 : tensor<4xf32>
-// CHECK-NEXT: return %2 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @stdElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
-// CHECK-NEXT: (%arg1: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4 : index
-// CHECK-NEXT: %1 = flow.dispatch @stdElementwiseOps_ex_dispatch_0::@stdElementwiseOps_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
-// CHECK-NEXT: flow.return %1 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %0 : tensor<4xf32>
-// CHECK-NEXT: }
-
-// -----
-
func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
%0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
%1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
@@ -45,26 +15,24 @@
return %2 : tensor<4xf32>
}
-// CHECK-LABEL: flow.executable @hloElementwiseOps_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @hloElementwiseOps_ex_dispatch_0
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @hloElementwiseOps_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-// CHECK-NEXT: %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
-// CHECK-NEXT: %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
-// CHECK-NEXT: return %2 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
-// CHECK-NEXT: (%arg1: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4 : index
-// CHECK-NEXT: %1 = flow.dispatch @hloElementwiseOps_ex_dispatch_0::@hloElementwiseOps_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
-// CHECK-NEXT: flow.return %1 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %0 : tensor<4xf32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @hloElementwiseOps_dispatch_0 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @hloElementwiseOps_dispatch_0
+// CHECK-NEXT: module {
+// CHECK-NEXT: func @hloElementwiseOps_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<writeonly:4xf32>) {
+// CHECK: %{{.+}} = linalg.generic
+// CHECK: %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+// CHECK-NEXT: %{{.+}} = subf %{{.+}}, %{{.+}} : f32
+// CHECK-NEXT: %{{.+}} = mulf %{{.+}}, %{{.+}} : f32
+// CHECK: func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
+// CHECK-NEXT: (%arg1: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-DAG: %[[C1:.+]] = constant 1 : index
+// CHECK-DAG: %[[C4:.+]] = constant 4 : index
+// CHECK-NEXT: %1 = flow.dispatch @hloElementwiseOps_dispatch_0::@hloElementwiseOps_dispatch_0[%[[C4]], %[[C1]], %[[C1]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
+// CHECK-NEXT: flow.return %1 : tensor<4xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: return %0 : tensor<4xf32>
+// CHECK-NEXT: }
// -----
@@ -75,44 +43,35 @@
return %2 : tensor<4x4xf32>
}
-// CHECK-LABEL: flow.executable @interleavedDot_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @interleavedDot_ex_dispatch_0
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @interleavedDot_ex_dispatch_0(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT: %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
-// CHECK-NEXT: return %0 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: flow.executable @interleavedDot_ex_dispatch_1 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @interleavedDot_ex_dispatch_1
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @interleavedDot_ex_dispatch_1(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT: %0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT: return %0 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: flow.executable @interleavedDot_ex_dispatch_2 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @interleavedDot_ex_dispatch_2
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @interleavedDot_ex_dispatch_2(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT: %0 = mhlo.multiply %arg0, %arg1 : tensor<4x4xf32>
-// CHECK-NEXT: return %0 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @interleavedDot(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x4xf32>) -> tensor<4x4xf32> =
-// CHECK-NEXT: (%arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 16 : index
-// CHECK-NEXT: %1 = flow.dispatch @interleavedDot_ex_dispatch_0::@interleavedDot_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT: %2 = flow.dispatch @interleavedDot_ex_dispatch_1::@interleavedDot_ex_dispatch_1[%[[WORKLOAD]]](%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT: %3 = flow.dispatch @interleavedDot_ex_dispatch_2::@interleavedDot_ex_dispatch_2[%[[WORKLOAD]]](%2, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %0 : tensor<4x4xf32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @interleavedDot_dispatch_0 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @interleavedDot_dispatch_0
+// CHECK-NEXT: module {
+// CHECK-NEXT: func @interleavedDot_dispatch_0
+// CHECK: %{{.+}} = linalg.generic
+// CHECK: %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+// CHECK: flow.executable @interleavedDot_dispatch_1 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @interleavedDot_dispatch_1
+// CHECK-NEXT: module {
+// CHECK-NEXT: func @interleavedDot_dispatch_1
+// CHECK: %{{.+}} = linalg.matmul
+// CHECK: flow.executable @interleavedDot_dispatch_2 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @interleavedDot_dispatch_2
+// CHECK-NEXT: module {
+// CHECK-NEXT: func @interleavedDot_dispatch_2
+// CHECK: %{{.+}} = linalg.generic
+// CHECK: %{{.+}} = mulf %{{.+}}, %{{.+}} : f32
+// CHECK: func @interleavedDot(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x4xf32>) -> tensor<4x4xf32> =
+// CHECK-NEXT: (%arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
+// CHECK-DAG: %[[C1:.+]] = constant 1 : index
+// CHECK-DAG: %[[C4:.+]] = constant 4 : index
+// CHECK-NEXT: %1 = flow.dispatch @interleavedDot_dispatch_0::@interleavedDot_dispatch_0[%[[C4]], %[[C4]], %[[C1]]](%arg1) : (tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT: %2 = flow.dispatch @interleavedDot_dispatch_1::@interleavedDot_dispatch_1[%[[C4]], %[[C4]], %[[C1]]](%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT: %3 = flow.dispatch @interleavedDot_dispatch_2::@interleavedDot_dispatch_2[%[[C4]], %[[C4]], %[[C1]]](%2, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: return %0 : tensor<4x4xf32>
+// CHECK-NEXT: }
// -----
@@ -126,25 +85,18 @@
return %1 : tensor<4xf32>
}
-// CHECK-LABEL: flow.executable @reduction_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @reduction_ex_dispatch_0
+// CHECK-LABEL: flow.executable @reduction_dispatch_0 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @reduction_dispatch_0
// CHECK-NEXT: module {
-// CHECK-NEXT: func @reduction_ex_dispatch_0(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %cst = constant dense<0.000000e+00> : tensor<f32>
-// CHECK-NEXT: %0 = "mhlo.reduce"(%arg0, %cst) ( {
-// CHECK-NEXT: ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors
-// CHECK-NEXT: %1 = mhlo.add %arg1, %arg2 : tensor<f32>
-// CHECK-NEXT: "mhlo.return"(%1) : (tensor<f32>) -> ()
-// CHECK-NEXT: }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
-// CHECK-NEXT: return %0 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
+// CHECK-NEXT: func @reduction_dispatch_0
+// CHECK: %{{.+}} = linalg.generic
+// CHECK: %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+// CHECK: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x8xf32>) -> tensor<4xf32> =
// CHECK-NEXT: (%arg1: tensor<4x8xf32>) -> tensor<4xf32> {
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4 : index
-// CHECK-NEXT: %1 = flow.dispatch @reduction_ex_dispatch_0::@reduction_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4x8xf32>) -> tensor<4xf32>
+// CHECK-DAG: %[[C1:.+]] = constant 1 : index
+// CHECK-DAG: %[[C4:.+]] = constant 4 : index
+// CHECK-NEXT: %1 = flow.dispatch @reduction_dispatch_0::@reduction_dispatch_0[%[[C4]], %[[C1]], %[[C1]]](%arg1) : (tensor<4x8xf32>) -> tensor<4xf32>
// CHECK-NEXT: flow.return %1 : tensor<4xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %0 : tensor<4xf32>
@@ -158,27 +110,26 @@
return %1 : tensor<2x4xi32>
}
-// CHECK-LABEL: flow.executable @dynamicUpdateSlice_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @dynamicUpdateSlice_ex_dispatch_0
-// CHECK-NEXT: module {
-// CHECK-NEXT: func @dynamicUpdateSlice_ex_dispatch_0(%arg0: tensor<2x4xi32>, %arg1: tensor<2x4xi32>) -> tensor<2x4xi32> {
-// CHECK-NEXT: %0 = mhlo.add %arg0, %arg1 : tensor<2x4xi32>
-// CHECK-NEXT: return %0 : tensor<2x4xi32>
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> {
-// CHECK-DAG: %[[ARG2_LOAD:.+]] = flow.tensor.load %arg2 : tensor<i32>
-// CHECK-DAG: %[[ARG2_INDEX:.+]] = index_cast %[[ARG2_LOAD]] : i32 to index
-// CHECK-DAG: %[[ARG3_LOAD:.+]] = flow.tensor.load %arg3 : tensor<i32>
-// CHECK-DAG: %[[ARG3_INDEX:.+]] = index_cast %[[ARG3_LOAD]] : i32 to index
-// CHECK-NEXT: %[[RET:.+]] = flow.ex.stream.fragment(%arg0, %[[ARG2_INDEX]], %[[ARG3_INDEX]], %arg1) : (tensor<2x4xi32>, index, index, tensor<1x1xi32>) -> tensor<2x4xi32> =
-// CHECK-NEXT: (%arg4: tensor<2x4xi32>, %arg5: index, %arg6: index, %arg7: tensor<1x1xi32>) -> tensor<2x4xi32> {
-// CHECK-NEXT: %[[WORKLOAD:.+]] = constant 8 : index
-// CHECK-NEXT: %[[ARG4_CLONE:.+]] = flow.tensor.clone %arg4 : tensor<2x4xi32>
-// CHECK-NEXT: %[[T0:.+]] = flow.tensor.update %arg7, %arg4[%arg5, %arg6] : tensor<1x1xi32> -> tensor<2x4xi32>
-// CHECK-NEXT: %[[T1:.+]] = flow.dispatch @dynamicUpdateSlice_ex_dispatch_0::@dynamicUpdateSlice_ex_dispatch_0[%[[WORKLOAD]]](%[[ARG4_CLONE]], %[[T0]]) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
-// CHECK-NEXT: flow.return %[[T1]] : tensor<2x4xi32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[RET]] : tensor<2x4xi32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @dynamicUpdateSlice_dispatch_0 attributes {sym_visibility = "private"} {
+// CHECK-NEXT: flow.dispatch.entry @dynamicUpdateSlice_dispatch_0
+// CHECK-NEXT: module {
+// CHECK-NEXT: func @dynamicUpdateSlice_dispatch_0
+// CHECK: %{{.+}} = linalg.generic
+// CHECK: %{{.+}} = addi %{{.+}}, %{{.+}} : i32
+// CHECK: func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> {
+// CHECK-DAG: %[[ARG2_LOAD:.+]] = flow.tensor.load %arg2 : tensor<i32>
+// CHECK-DAG: %[[ARG2_INDEX:.+]] = index_cast %[[ARG2_LOAD]] : i32 to index
+// CHECK-DAG: %[[ARG3_LOAD:.+]] = flow.tensor.load %arg3 : tensor<i32>
+// CHECK-DAG: %[[ARG3_INDEX:.+]] = index_cast %[[ARG3_LOAD]] : i32 to index
+// CHECK-NEXT: %[[RET:.+]] = flow.ex.stream.fragment(%arg0, %[[ARG2_INDEX]], %[[ARG3_INDEX]], %arg1) : (tensor<2x4xi32>, index, index, tensor<1x1xi32>) -> tensor<2x4xi32> =
+// CHECK-NEXT: (%arg4: tensor<2x4xi32>, %arg5: index, %arg6: index, %arg7: tensor<1x1xi32>) -> tensor<2x4xi32> {
+// CHECK-DAG: %[[C1:.+]] = constant 1 : index
+// CHECK-DAG: %[[C2:.+]] = constant 2 : index
+// CHECK-DAG: %[[C4:.+]] = constant 4 : index
+// CHECK-NEXT: %[[ARG4_CLONE:.+]] = flow.tensor.clone %arg4 : tensor<2x4xi32>
+// CHECK-NEXT: %[[T0:.+]] = flow.tensor.update %arg7, %arg4[%arg5, %arg6] : tensor<1x1xi32> -> tensor<2x4xi32>
+// CHECK-NEXT: %[[T1:.+]] = flow.dispatch @dynamicUpdateSlice_dispatch_0::@dynamicUpdateSlice_dispatch_0[%[[C4]], %[[C2]], %[[C1]]](%[[ARG4_CLONE]], %[[T0]]) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
+// CHECK-NEXT: flow.return %[[T1]] : tensor<2x4xi32>
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[RET]] : tensor<2x4xi32>
+// CHECK-NEXT: }
diff --git a/iree/compiler/Dialect/Flow/Utils/BUILD b/iree/compiler/Dialect/Flow/Utils/BUILD
index a15ec70..a8260f8 100644
--- a/iree/compiler/Dialect/Flow/Utils/BUILD
+++ b/iree/compiler/Dialect/Flow/Utils/BUILD
@@ -21,22 +21,16 @@
cc_library(
name = "Utils",
srcs = [
- "DispatchUtils.cpp",
"WorkloadUtils.cpp",
],
hdrs = [
- "DispatchUtils.h",
"WorkloadUtils.h",
],
deps = [
- "//iree/compiler/Dialect/Flow/IR",
"//iree/compiler/Dialect/Shape/IR",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
- "@llvm-project//mlir:LinalgOps",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
- "@llvm-project//mlir:TosaDialect",
- "@mlir-hlo//:hlo",
],
)
diff --git a/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt b/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt
index 47c410c..28b7f70 100644
--- a/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt
@@ -14,21 +14,15 @@
NAME
Utils
HDRS
- "DispatchUtils.h"
"WorkloadUtils.h"
SRCS
- "DispatchUtils.cpp"
"WorkloadUtils.cpp"
DEPS
LLVMSupport
MLIRIR
- MLIRLinalg
MLIRStandard
MLIRSupport
- MLIRTosa
- iree::compiler::Dialect::Flow::IR
iree::compiler::Dialect::Shape::IR
- tensorflow::mlir_hlo
PUBLIC
)
diff --git a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
deleted file mode 100644
index 8c13cdb..0000000
--- a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeDialect.h"
-#include "llvm/ADT/SetVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-bool isOpOfKnownDialect(Operation *op) {
- if (!op->getDialect()) return false;
- // TODO(benvanik): replace with op dispatchability interface to allow dialects
- // to opt into dispatch.
- auto dialectNamespace = op->getDialect()->getNamespace();
- return dialectNamespace == FlowDialect::getDialectNamespace() ||
- dialectNamespace == linalg::LinalgDialect::getDialectNamespace() ||
- dialectNamespace == mhlo::MhloDialect::getDialectNamespace() ||
- dialectNamespace == mlir::StandardOpsDialect::getDialectNamespace() ||
- dialectNamespace == ShapeDialect::getDialectNamespace() ||
- dialectNamespace == tosa::TosaDialect::getDialectNamespace();
-}
-
-namespace {
-
-// Returns the set of values that must be captured for use by |ops| and the
-// set of values defined by |ops| that are used outside of the set.
-LogicalResult analyzeOpRangeValues(ArrayRef<Operation *> ops,
- llvm::SetVector<Value> *capturedValues,
- llvm::SetVector<Value> *escapingValues) {
- llvm::SmallDenseSet<Operation *> opSet;
- opSet.reserve(ops.size());
- opSet.insert(ops.begin(), ops.end());
- for (auto *op : ops) {
- for (auto value : op->getOperands()) {
- if (!llvm::is_contained(opSet, value.getDefiningOp())) {
- // Op is using a value not in the ops set, ensure we capture it.
- capturedValues->insert(value);
- }
- }
- for (auto value : op->getResults()) {
- for (auto &use : value.getUses()) {
- if (!llvm::is_contained(opSet, use.getOwner())) {
- // An op outside of the ops set is using the value, needs to escape.
- escapingValues->insert(value);
- continue;
- }
- }
- }
- }
- return success();
-}
-
-} // namespace
-
-LogicalResult buildDispatchRegion(Block *parentBlock, Value workload,
- ArrayRef<Operation *> ops) {
- // Fused location with all ops.
- SmallVector<Location, 16> opLocs;
- for (auto *op : ops) {
- opLocs.push_back(op->getLoc());
- }
- auto regionLoc = FusedLoc::get(workload.getContext(), opLocs);
-
- // Get a list of values that we need to capture and values that escape the
- // region and need to be returned.
- llvm::SetVector<Value> capturedValues;
- llvm::SetVector<Value> escapingValues;
- if (failed(analyzeOpRangeValues(ops, &capturedValues, &escapingValues))) {
- return failure();
- }
- SmallVector<Type, 8> escapingTypes;
- for (auto value : escapingValues) escapingTypes.push_back(value.getType());
-
- // Build the region op and add it to the parent block.
- OpBuilder parentBuilder = OpBuilder::atBlockEnd(parentBlock);
- parentBuilder.setInsertionPoint(ops.back());
- auto dispatchRegionOp = parentBuilder.create<IREE::Flow::DispatchRegionOp>(
- regionLoc, escapingTypes, workload, capturedValues.getArrayRef());
-
- // Create the block and setup the arg mapping for captured values.
- auto *regionBlock = new Block();
- dispatchRegionOp.body().push_back(regionBlock);
- OpBuilder regionBuilder = OpBuilder::atBlockEnd(regionBlock);
- BlockAndValueMapping mapping;
- for (auto capturedValue : capturedValues) {
- auto blockArg = regionBlock->addArgument(capturedValue.getType());
- mapping.map(capturedValue, blockArg);
- }
-
- // Clone ops into the new region block.
- for (auto *op : ops) {
- // Note that this updates the mapping with the new values (so at the end
- // we have those new values).
- regionBuilder.clone(*op, mapping);
- }
-
- // Return results (as we need a terminator in our block).
- // These are all of the values that escape our region.
- SmallVector<Value, 8> resultValues;
- for (auto oldValue : escapingValues) {
- resultValues.push_back(mapping.lookupOrDefault(oldValue));
- }
- regionBuilder.create<IREE::Flow::ReturnOp>(opLocs.back(), resultValues);
-
- // Replace usage of values with the results of the region.
- for (int i = 0; i < escapingValues.size(); ++i) {
- escapingValues[i].replaceAllUsesWith(dispatchRegionOp.getResult(i));
- }
-
- // Remove original ops from the parent region.
- for (auto it = ops.rbegin(); it != ops.rend(); ++it) {
- (*it)->erase();
- }
-
- return success();
-}
-
-namespace {
-
-// Recursively finds all reachable functions from the given |rootFunc| and adds
-// them to the |reachableFuncs| set.
-//
-// Note that indirect calls are not supported, however we don't allow those in
-// dispatch regions anyway so they should not be present here.
-LogicalResult findReachableFunctions(
- FuncOp rootFuncOp, llvm::SetVector<FuncOp> &reachableFuncs,
- llvm::StringMap<FuncOp> &dispatchableFuncOps) {
- llvm::SetVector<FuncOp> worklist;
- worklist.insert(rootFuncOp);
- while (!worklist.empty()) {
- auto funcOp = worklist.pop_back_val();
- funcOp.walk([&](CallOp callOp) {
- auto calleeOp = dispatchableFuncOps.find(callOp.callee())->second;
- if (reachableFuncs.insert(calleeOp)) {
- worklist.insert(calleeOp);
- }
- });
- }
- return success();
-}
-
-} // namespace
-
-ExecutableOp createExecutable(Location loc, StringRef executableName,
- ArrayRef<FuncOp> funcOps, ModuleOp parentModuleOp,
- llvm::StringMap<FuncOp> &dispatchableFuncOps) {
- assert(!funcOps.empty() && "must have at least one entry function");
-
- // Gather all reachable functions.
- llvm::SetVector<FuncOp> reachableFuncs;
- for (auto funcOp : funcOps) {
- (void)findReachableFunctions(funcOp, reachableFuncs, dispatchableFuncOps);
- }
-
- // Create the executable that will contain the outlined region.
- // NOTE: this will get uniquified if we have multiple in the same block.
- OpBuilder parentModuleBuilder(&parentModuleOp.getBody()->back());
- auto executableOp =
- parentModuleBuilder.create<IREE::Flow::ExecutableOp>(loc, executableName);
-
- // Create the inner ModuleOp that contains the original functions. We need
- // to provide this shim as some ops (like std.call) look for the
- // containing module to provide symbol resolution.
- OpBuilder executableBuilder(executableOp);
- executableBuilder.setInsertionPointToStart(&executableOp.getBlock());
- auto innerModule = executableBuilder.create<ModuleOp>(loc);
- for (auto funcOp : funcOps) {
- innerModule.push_back(funcOp);
- }
-
- // Copy all reachable functions into the executable.
- // Linker passes may dedupe these later on.
- OpBuilder innerModuleBuilder = OpBuilder::atBlockEnd(innerModule.getBody());
- innerModuleBuilder.setInsertionPoint(innerModule.getBody(),
- ++innerModule.getBody()->begin());
- for (auto reachableFunc : reachableFuncs) {
- innerModuleBuilder.clone(*reachableFunc);
- }
-
- return executableOp;
-}
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h
deleted file mode 100644
index 3a53416..0000000
--- a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Utilities for dispatch region and function manipulation.
-// These are shared between all dispatchable types such as the standard
-// dispatch region as well as dispatch-related types like reduction region.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_
-#define IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-// Returns true if we know about this dialect and have special dispatchability
-// information about it.
-bool isOpOfKnownDialect(Operation *op);
-
-// Builds a new dispatch region with the given |ops|.
-// The region will capture all required values and return all values used
-// outside of the |ops| provided. The region will be inserted at the location of
-// the last operation in the set.
-//
-// All |ops| must be compatible with the |workload| specified as they will all
-// be dispatched with the same workgroup structure.
-// TODO(benvanik): ensure we want to insert at end. Maybe front?
-LogicalResult buildDispatchRegion(Block *parentBlock, Value workload,
- ArrayRef<Operation *> ops);
-
-// Creates a flow.executable out of a set of functions, pulling in all other
-// functions reachable by the provided functions.
-ExecutableOp createExecutable(Location loc, StringRef executableName,
- ArrayRef<FuncOp> funcOps, ModuleOp parentModuleOp,
- llvm::StringMap<FuncOp> &dispatchableFuncOps);
-
-} // namespace Flow
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
-
-#endif // IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_
diff --git a/iree/compiler/Dialect/HAL/Transforms/BUILD b/iree/compiler/Dialect/HAL/Transforms/BUILD
index 43832e6..99247a5 100644
--- a/iree/compiler/Dialect/HAL/Transforms/BUILD
+++ b/iree/compiler/Dialect/HAL/Transforms/BUILD
@@ -28,7 +28,6 @@
"InlineDeviceSwitches.cpp",
"LinkExecutables.cpp",
"MaterializeConstantPoolBuffers.cpp",
- "MaterializeInterfaces.cpp",
"MaterializeInterfaces2.cpp",
"MaterializeResourceCaches.cpp",
"MemoizeDeviceQueries.cpp",
diff --git a/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt b/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt
index 86850b3..975f903 100644
--- a/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt
@@ -23,7 +23,6 @@
"InlineDeviceSwitches.cpp"
"LinkExecutables.cpp"
"MaterializeConstantPoolBuffers.cpp"
- "MaterializeInterfaces.cpp"
"MaterializeInterfaces2.cpp"
"MaterializeResourceCaches.cpp"
"MemoizeDeviceQueries.cpp"
diff --git a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
deleted file mode 100644
index 57f5a8e..0000000
--- a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
+++ /dev/null
@@ -1,449 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/HAL/Target/TargetBackend.h"
-#include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
-#include "iree/compiler/Dialect/HAL/Transforms/Passes.h"
-#include "iree/compiler/Dialect/HAL/Utils/TypeUtils.h"
-#include "llvm/ADT/StringSet.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace HAL {
-
-// Adds IO ops (such as hal.io.binding) and updates function signatures to use
-// them for their IO. We do this in a target-independent manner today so that we
-// can share the same descriptor set logic and parameter population code on the
-// scheduling side. In the future we could allow backends to opt into different
-// behavior.
-static llvm::Optional<IREE::HAL::InterfaceOp> declareInterfaceIO(
- IREE::Flow::ExecutableOp sourceOp, IREE::HAL::ExecutableOp targetOp) {
- auto moduleOp = sourceOp.getInnerModule();
- OpBuilder executableBuilder(targetOp.getContext());
- executableBuilder.setInsertionPointToStart(&targetOp.getBlock());
-
- // NOTE: we assume right now that all entry points have the same signature.
- SmallVector<FuncOp, 1> entryFuncOps;
- SmallVector<Location, 1> entryLocs;
- for (auto &op : sourceOp.getBlock()) {
- if (auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) {
- auto funcOp =
- moduleOp.lookupSymbol<FuncOp>(dispatchEntryOp.function_ref());
- entryFuncOps.push_back(funcOp);
- entryLocs.push_back(dispatchEntryOp.getLoc());
- }
- }
- auto interfaceLoc = executableBuilder.getFusedLoc(entryLocs);
- auto interfaceOp = executableBuilder.create<IREE::HAL::InterfaceOp>(
- interfaceLoc, "legacy_io");
- OpBuilder interfaceBuilder(interfaceOp);
- interfaceBuilder.setInsertionPointToStart(&interfaceOp.getBlock());
-
- // Add one binding per argument and result. This matches the legacy interface
- // and allows us to keep using the current binding setup on the scheduler
- // side.
- // NOTE: we assume right now that all entry points have the same signature.
- // TODO(benvanik): replace when we have descriptor sets in the HAL IR.
- auto anyFuncOp = entryFuncOps.front();
- int nextBindingOrdinal = 0;
- int pushConstantCount = 0;
- for (auto inputType : llvm::enumerate(anyFuncOp.getType().getInputs())) {
- if (inputType.value().isa<TensorType>()) {
- int bindingOrdinal = nextBindingOrdinal++;
- auto bindingName = "arg" + std::to_string(inputType.index());
- interfaceBuilder.create<IREE::HAL::InterfaceBindingOp>(
- interfaceLoc, bindingName, /*set=*/APInt(64, 0),
- /*binding=*/APInt(64, bindingOrdinal),
- IREE::HAL::DescriptorType::StorageBuffer,
- IREE::HAL::MemoryAccessBitfield::Read);
- } else if (auto indexType = inputType.value().dyn_cast<IndexType>()) {
- ++pushConstantCount;
- } else if (auto integerType = inputType.value().dyn_cast<IntegerType>()) {
- if (integerType.getIntOrFloatBitWidth() != 32) {
- emitError(interfaceLoc)
- << "unsupported argument " << inputType.index() << " bit depth "
- << integerType.getIntOrFloatBitWidth() << " (" << integerType
- << "); only 32-bit values are supported right now";
- return llvm::None;
- }
- ++pushConstantCount;
- } else {
- emitError(interfaceLoc)
- << "unsupported interface function argument " << inputType.index()
- << " type " << inputType.value()
- << "; requires tensors or simple primitive values (i32, etc)";
- return llvm::None;
- }
- }
- for (auto outputType : llvm::enumerate(anyFuncOp.getType().getResults())) {
- int bindingOrdinal = nextBindingOrdinal++;
- auto bindingName = "ret" + std::to_string(outputType.index());
- if (outputType.value().isa<TensorType>()) {
- interfaceBuilder.create<IREE::HAL::InterfaceBindingOp>(
- interfaceLoc, bindingName, /*set=*/APInt(64, 0),
- /*binding=*/APInt(64, bindingOrdinal),
- IREE::HAL::DescriptorType::StorageBuffer,
- IREE::HAL::MemoryAccessBitfield::DiscardWrite);
- } else {
- emitError(interfaceLoc)
- << "unsupported result " << outputType.index() << " type "
- << outputType.value() << "; requires tensor types";
- return llvm::None;
- }
- }
-
- if (pushConstantCount > 0) {
- interfaceOp->setAttr("push_constants",
- interfaceBuilder.getIndexAttr(pushConstantCount));
- }
-
- return interfaceOp;
-}
-
-// Converts a value to/from one supported by the ABI from/to an arbitrary tensor
-// type.
-//
-// Ideally we'd use some type-aware conversion to handle signed/unsigned
-// saturation vs. truncation. As an example, we'd want to zero-extend an
-// unsigned i4 to a signed i8. We also don't want to use HLO ops here, but the
-// standard ops (trunci, zexti, etc) are not supported by subsequent lowerings
-// and just cause pain.
-//
-// Example: `tensor<4xi8>` -> `tensor<4xi1>`
-// or `tensor<4xi1>` -> `tensor<4xi8>`
-static Value convertABITensorType(Location loc, Value sourceValue,
- TensorType targetType, OpBuilder &builder) {
- auto sourceType = sourceValue.getType().cast<TensorType>();
- if (sourceType == targetType) {
- return sourceValue;
- }
- // TODO(benvanik): use a type converter or a dialect interface.
- return builder.createOrFold<mhlo::ConvertOp>(loc, sourceValue,
- targetType.getElementType());
-}
-
-// Creates a new entry function that uses the hal.interface bindings to marshal
-// IO to the original entry function.
-// Invariants:
-// - The thunk function generates loads for entries in the InterfaceOp
-// based on category:
-// 1. Push constants
-// 2. Bindings
-// Within a category, the order follows the order within the interface.
-// Such an ordering can be useful for downstream code generation because
-// it can often be necessary to reference primitives in the materialization
-// of binding-based loads (i.e. for size calculations, etc). For any
-// stronger guarantees or inter-load ordering constraints, downstream
-// code generation must explicitly take non-determinism of argument
-// ordering into account.
-static Optional<FuncOp> createDispatchEntryThunk(
- FuncOp sourceFuncOp, IREE::HAL::InterfaceOp interfaceOp,
- IREE::HAL::ExecutableTargetOp targetOp) {
- // Clone the source FuncOp into the target then manipulate it into a
- // dispatch entry thunk.
- auto clonedFuncOp = sourceFuncOp.clone();
- targetOp.getInnerModule().push_back(clonedFuncOp);
-
- // Functions take all I/O through the interface API.
- auto sourceFuncType = clonedFuncOp.getType();
- auto thunkFuncType = FunctionType::get(clonedFuncOp.getContext(), {}, {});
- auto thunkFuncOp = FuncOp::create(clonedFuncOp.getLoc(),
- clonedFuncOp.getName(), thunkFuncType);
- clonedFuncOp.setName((clonedFuncOp.getName() + "_impl").str());
- clonedFuncOp.setPrivate();
- clonedFuncOp->getParentRegion()->getBlocks().front().push_front(thunkFuncOp);
-
- // For now we only support tensor types, so bindings are in order.
- // In the future we will want to provide N:M mappings (as well as the
- // information to compute offsets).
- int binding = 0;
- auto bindingOps = llvm::to_vector<4>(
- interfaceOp.getBlock().getOps<IREE::HAL::InterfaceBindingOp>());
-
- // Pull all arguments from the bindings.
- auto *thunkEntryBlock = thunkFuncOp.addEntryBlock();
- OpBuilder thunkEntryBuilder = OpBuilder::atBlockEnd(thunkEntryBlock);
- Operation *firstNonConstOp = nullptr;
- auto positionForNonConst = [&]() {
- thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
- };
- auto positionForConst = [&]() {
- if (firstNonConstOp) {
- thunkEntryBuilder.setInsertionPoint(firstNonConstOp);
- } else {
- thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
- }
- };
-
- // Create load ops, first for push constants with binding based loads after.
- auto zeroOffset = thunkEntryBuilder.createOrFold<mlir::ConstantIndexOp>(
- thunkFuncOp.getLoc(), 0);
- SmallVector<Value, 4> operands;
- int pushConstantOffset = 0;
- for (auto inputType : sourceFuncType.getInputs()) {
- if (auto sourceType = inputType.dyn_cast<TensorType>()) {
- positionForNonConst();
- auto bindingOp = bindingOps[binding++];
- auto targetType = convertTensorTypeToABIType(sourceType);
- auto loadOp = thunkEntryBuilder.create<IREE::HAL::InterfaceLoadTensorOp>(
- thunkFuncOp.getLoc(), targetType,
- thunkEntryBuilder.getSymbolRefAttr(
- interfaceOp.sym_name(),
- {thunkEntryBuilder.getSymbolRefAttr(bindingOp)}),
- zeroOffset);
- Value abiValue =
- convertABITensorType(thunkFuncOp.getLoc(), loadOp.getResult(),
- sourceType, thunkEntryBuilder);
- if (!abiValue) {
- clonedFuncOp.emitError()
- << "function argument type " << inputType
- << " cannot be converted to a HAL ABI type " << targetType;
- return llvm::None;
- }
- operands.push_back(abiValue);
- firstNonConstOp = loadOp;
- } else if (inputType.isa<IndexType>() || inputType.isa<IntegerType>()) {
- positionForConst();
- auto loadOp =
- thunkEntryBuilder.create<IREE::HAL::InterfaceLoadConstantOp>(
- thunkFuncOp.getLoc(), inputType, APInt(64, pushConstantOffset));
- operands.push_back(loadOp.getResult());
- ++pushConstantOffset;
- } else {
- clonedFuncOp.emitError() << "function argument type " << inputType
- << " is not valid for interface I/O";
- return llvm::None;
- }
- }
- thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
-
- // Call the original entry function.
- auto callOp = thunkEntryBuilder.create<mlir::CallOp>(thunkFuncOp.getLoc(),
- clonedFuncOp, operands);
-
- // Push all results to the bindings.
- for (auto resultTypeValue :
- llvm::zip(sourceFuncType.getResults(), callOp.getResults())) {
- auto sourceType = std::get<0>(resultTypeValue).cast<TensorType>();
- auto targetType = convertTensorTypeToABIType(sourceType);
- Value resultValue = std::get<1>(resultTypeValue);
- Value abiValue = convertABITensorType(thunkFuncOp.getLoc(), resultValue,
- targetType, thunkEntryBuilder);
- if (!abiValue) {
- clonedFuncOp.emitError()
- << "function result type " << resultValue.getType()
- << " cannot be converted from HAL ABI type " << targetType;
- return llvm::None;
- }
- auto bindingOp = bindingOps[binding++];
- thunkEntryBuilder.create<IREE::HAL::InterfaceStoreTensorOp>(
- thunkFuncOp.getLoc(), abiValue,
- thunkEntryBuilder.getSymbolRefAttr(
- interfaceOp.sym_name(),
- {thunkEntryBuilder.getSymbolRefAttr(bindingOp)}),
- zeroOffset);
- }
- thunkEntryBuilder.create<mlir::ReturnOp>(thunkFuncOp.getLoc());
-
- return thunkFuncOp;
-}
-
-// Adds the entry point ops with assigned ordinals for each entry function.
-// The entry points will all use the provided |interfaceOp|.
-static LogicalResult declareEntryPointOps(
- IREE::Flow::ExecutableOp sourceExecutableOp,
- IREE::HAL::ExecutableOp targetExecutableOp,
- IREE::HAL::InterfaceOp interfaceOp) {
- auto targetOps =
- targetExecutableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>();
- for (auto targetOp : targetOps) {
- OpBuilder builder(&targetOp.getBlock().front());
-
- // For each Flow entry point, create a HAL entry point and dispatch thunk.
- int nextOrdinal = 0;
- for (auto &op : sourceExecutableOp.getBlock()) {
- if (auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) {
- auto sourceFuncOp =
- sourceExecutableOp.getInnerModule().lookupSymbol<FuncOp>(
- dispatchEntryOp.function_ref());
-
- auto thunkFuncOp =
- createDispatchEntryThunk(sourceFuncOp, interfaceOp, targetOp);
- if (!thunkFuncOp.hasValue()) {
- return failure();
- }
- dispatchEntryOp->setAttr(
- "function_ref", builder.getSymbolRefAttr(thunkFuncOp.getValue()));
-
- builder.create<IREE::HAL::ExecutableEntryPointOp>(
- dispatchEntryOp.getLoc(),
- builder.getStringAttr(dispatchEntryOp.function_ref()),
- builder.getIndexAttr(nextOrdinal++),
- builder.getSymbolRefAttr(interfaceOp),
- TypeAttr::get(sourceFuncOp.getType()), ArrayAttr{});
- }
- }
-
- // Copy interface bindings into the target module so symbol references work.
- auto inlinedInterfaceOp = interfaceOp.clone();
- inlinedInterfaceOp.setPrivate();
- targetOp.getInnerModule().push_back(inlinedInterfaceOp);
- }
- return success();
-}
-
-// Creates zero or more hal.executable.target ops for each target backend.
-// The source op will contain the flow.executable contents and any attributes
-// the backend wants to carry along during transformation.
-static LogicalResult declareTargetOps(TargetOptions targetOptions,
- IREE::Flow::ExecutableOp sourceOp,
- IREE::HAL::ExecutableOp executableOp) {
- // The user has specified what targets they want as a set of patterns. This
- // matches against those patterns so vulkan-* may match vulkan-v1.1 and
- // vulkan-v1.2.
- auto targetBackends = matchTargetBackends(targetOptions.targets);
- if (targetBackends.empty()) {
- auto diagnostic = sourceOp.emitError();
- diagnostic
- << "no target backends available for executable translation; ensure "
- << "they are linked in and the target options are properly "
- << "specified. requested = [ ";
- for (const auto &target : targetOptions.targets) {
- diagnostic << "'" << target << "' ";
- }
- diagnostic << "], available = [ ";
- for (const auto &target : getRegisteredTargetBackends()) {
- diagnostic << "'" << target << "' ";
- }
- diagnostic << "]";
- return diagnostic;
- }
-
- // Materialize all of the hal.executable.target ops for all backends we are
- // targeting. Note that each backend may create zero or more target ops.
- for (auto &targetBackend : targetBackends) {
- targetBackend->declareTargetOps(sourceOp, executableOp);
- }
-
- // Ensure that at least one target op got created. If it didn't that means
- // the executable cannot be translated and it's better to fail now.
- if (executableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>().empty()) {
- auto diagnostic = sourceOp.emitError();
- diagnostic
- << "no target backend was able to handle this executable; tried = [ ";
- for (const auto &target : targetOptions.targets) {
- diagnostic << "'" << target << "' ";
- }
- diagnostic << "]";
- return diagnostic;
- }
-
- return success();
-}
-
-class MaterializeInterfacesPass
- : public PassWrapper<MaterializeInterfacesPass, OperationPass<ModuleOp>> {
- public:
- explicit MaterializeInterfacesPass(TargetOptions targetOptions)
- : targetOptions_(targetOptions) {}
-
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::HAL::HALDialect>();
-
- auto targetBackends = matchTargetBackends(targetOptions_.targets);
- for (auto &targetBackend : targetBackends) {
- targetBackend->getDependentDialects(registry);
- }
- }
-
- void runOnOperation() override {
- // Processes all executables within the input module and produce the output
- // HAL ops. We should ensure all deduping is performed prior to this when
- // it's easier to diff IR and where we still have the flow context.
- auto sourceOps =
- llvm::to_vector<32>(getOperation().getOps<IREE::Flow::ExecutableOp>());
- for (auto sourceOp : sourceOps) {
- // Don't touch tiled executables as the new pass takes care of that.
- auto entryOps = sourceOp.getOps<IREE::Flow::DispatchEntryOp>();
- if (entryOps.empty()) continue;
- auto anyEntryOp = *entryOps.begin();
- if (anyEntryOp.workgroup_rank().hasValue()) {
- continue;
- }
-
- // Create the op that will contain the translated executables.
- OpBuilder builder = OpBuilder::atBlockEnd(getOperation().getBody());
- builder.setInsertionPointAfter(sourceOp);
- auto executableOp = builder.create<IREE::HAL::ExecutableOp>(
- sourceOp.getLoc(), sourceOp.getName());
- executableOp.setVisibility(sourceOp.getVisibility());
-
- // Add IO ops to define the bindings and how parameters are passed.
- auto interfaceOp = declareInterfaceIO(sourceOp, executableOp);
- if (!interfaceOp.hasValue()) {
- return signalPassFailure();
- }
-
- // Embed the hal.executable.target ops for each source.
- if (failed(declareTargetOps(targetOptions_, sourceOp, executableOp))) {
- return signalPassFailure();
- }
-
- // Annotate the entry points.
- // TODO(benvanik): allow entry points to use different interfaces.
- if (failed(declareEntryPointOps(sourceOp, executableOp,
- interfaceOp.getValue()))) {
- return signalPassFailure();
- }
-
- sourceOp.erase();
- }
- }
-
- private:
- TargetOptions targetOptions_;
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfacesPass(
- TargetOptions targetOptions) {
- return std::make_unique<MaterializeInterfacesPass>(targetOptions);
-}
-
-static PassRegistration<MaterializeInterfacesPass> pass(
- "iree-hal-materialize-interfaces",
- "Materializes hal.executable ops from flow.executable ops (DEPRECATED)",
- [] {
- auto options = getTargetOptionsFromFlags();
- return std::make_unique<MaterializeInterfacesPass>(options);
- });
-
-} // namespace HAL
-} // namespace IREE
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
index bc14850..ed3d498 100644
--- a/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
@@ -66,7 +66,6 @@
// Each executable needs a hal.interface to specify how the host and device
// comminucate across the ABI boundary.
passManager.addPass(createMaterializeInterfaces2Pass(targetOptions));
- passManager.addPass(createMaterializeInterfacesPass(targetOptions));
passManager.nest<ExecutableOp>().addNestedPass<ExecutableTargetOp>(
createPropagateConstantWorkgroupInfoPass());
diff --git a/iree/compiler/Dialect/HAL/Transforms/Passes.h b/iree/compiler/Dialect/HAL/Transforms/Passes.h
index ee1200c..589e321 100644
--- a/iree/compiler/Dialect/HAL/Transforms/Passes.h
+++ b/iree/compiler/Dialect/HAL/Transforms/Passes.h
@@ -72,8 +72,6 @@
// Defines hal.executables and hal.interfaces for flow.executable ops based on
// usage within the module. Target backends are queried to check for support and
// device placements are made.
-std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfacesPass(
- TargetOptions targetOptions);
std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfaces2Pass(
TargetOptions targetOptions);
@@ -152,7 +150,6 @@
createBenchmarkBatchDispatchesPass(/*repeatCount=*/1);
createInlineDeviceSwitchesPass();
createMemoizeDeviceQueriesPass();
- createMaterializeInterfacesPass(targetOptions);
createTranslateExecutablesPass(targetOptions);
createLinkExecutablesPass(targetOptions);
createResolveEntryPointOrdinalsPass();
diff --git a/iree/compiler/Dialect/HAL/Transforms/test/BUILD b/iree/compiler/Dialect/HAL/Transforms/test/BUILD
index b926056..05e537f 100644
--- a/iree/compiler/Dialect/HAL/Transforms/test/BUILD
+++ b/iree/compiler/Dialect/HAL/Transforms/test/BUILD
@@ -30,7 +30,6 @@
"identify_constant_pools.mlir",
"inline_device_switches.mlir",
"materialize_constant_pool_buffers.mlir",
- "materialize_interfaces.mlir",
"materialize_interfaces2.mlir",
"materialize_resource_caches.mlir",
"memoize_device_queries.mlir",
diff --git a/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt b/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt
index 8515a64..925a3de 100644
--- a/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt
@@ -19,7 +19,6 @@
"identify_constant_pools.mlir"
"inline_device_switches.mlir"
"materialize_constant_pool_buffers.mlir"
- "materialize_interfaces.mlir"
"materialize_interfaces2.mlir"
"materialize_resource_caches.mlir"
"memoize_device_queries.mlir"
diff --git a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
deleted file mode 100644
index c6ac12e..0000000
--- a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ /dev/null
@@ -1,109 +0,0 @@
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file -iree-hal-materialize-interfaces -iree-hal-target-backends=vmvx %s | IreeFileCheck %s
-
-// CHECK-LABEL: hal.executable @simpleMath_ex_dispatch_0
-// CHECK-DAG: hal.interface @legacy_io {
-// CHECK-NEXT: hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-// CHECK-NEXT: hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
-// CHECK-NEXT: }
-// CHECK-DAG: hal.executable.target @vmvx, filter="vmvx" {
-// CHECK-DAG: hal.executable.entry_point @simpleMath_rgn_dispatch_0 attributes {
-// CHECK-SAME: interface = @legacy_io,
-// CHECK-SAME: ordinal = 0 : index,
-// CHECK-SAME: signature = (tensor<4xf32>) -> tensor<4xf32>
-// CHECK-SAME: }
-flow.executable @simpleMath_ex_dispatch_0 {
- flow.dispatch.entry @simpleMath_rgn_dispatch_0 attributes {
- workload = 4 : index
- }
- // CHECK: module {
- module {
- // CHECK-NEXT: func @simpleMath_rgn_dispatch_0()
- // CHECK-NEXT: %[[ZERO:.+]] = constant 0 : index
- // CHECK-NEXT: %[[ARG0:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<4xf32>
- // CHECK-NEXT: %[[RET0:.+]] = call @simpleMath_rgn_dispatch_0_impl(%[[ARG0]]) : (tensor<4xf32>) -> tensor<4xf32>
- // CHECK-NEXT: hal.interface.store.tensor %[[RET0]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<4xf32>
- // CHECK-NEXT: return
- // CHECK-NEXT: }
- // CHECK-NEXT: func private @simpleMath_rgn_dispatch_0_impl
- func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
- %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
- return %0 : tensor<4xf32>
- }
- // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"}
- }
-}
-
-// -----
-
-// CHECK-LABEL: hal.executable @bools_ex_dispatch_0
-// CHECK-DAG: hal.interface @legacy_io {
-// CHECK-NEXT: hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-// CHECK-NEXT: hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-// CHECK-NEXT: hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-// CHECK-NEXT: }
-// CHECK-DAG: hal.executable.target @vmvx, filter="vmvx" {
-// CHECK-DAG: hal.executable.entry_point @bools_rgn_dispatch_0 attributes {
-// CHECK-SAME: interface = @legacy_io,
-// CHECK-SAME: ordinal = 0 : index,
-// CHECK-SAME: signature = (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1>
-// CHECK-SAME: }
-flow.executable @bools_ex_dispatch_0 {
- flow.dispatch.entry @bools_rgn_dispatch_0 attributes {
- workload = 4 : index
- }
- // CHECK: module {
- module {
- // CHECK-NEXT: func @bools_rgn_dispatch_0()
- // CHECK-DAG: %[[ZERO:.+]] = constant 0 : index
- // CHECK-DAG: %[[ARG0_I8:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<4xi8>
- // CHECK-DAG: %[[ARG0_I1:.+]] = "mhlo.convert"(%[[ARG0_I8]]) : (tensor<4xi8>) -> tensor<4xi1>
- // CHECK-DAG: %[[ARG1_I8:.+]] = hal.interface.load.tensor @legacy_io::@arg1, offset = %[[ZERO]] : tensor<4xi8>
- // CHECK-DAG: %[[ARG1_I1:.+]] = "mhlo.convert"(%[[ARG1_I8]]) : (tensor<4xi8>) -> tensor<4xi1>
- // CHECK-NEXT: %[[RET0_I1:.+]] = call @bools_rgn_dispatch_0_impl(%[[ARG0_I1]], %[[ARG1_I1]]) : (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1>
- // CHECK-NEXT: %[[RET0_I8:.+]] = "mhlo.convert"(%[[RET0_I1]]) : (tensor<4xi1>) -> tensor<4xi8>
- // CHECK-NEXT: hal.interface.store.tensor %[[RET0_I8]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<4xi8>
- // CHECK-NEXT: return
- // CHECK-NEXT: }
- // CHECK-NEXT: func private @bools_rgn_dispatch_0_impl(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4xi1>
- func @bools_rgn_dispatch_0(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4xi1> {
- %0 = mhlo.and %arg0, %arg1 : tensor<4xi1>
- %c = mhlo.constant dense<[false, false, true, false]> : tensor<4xi1>
- %1 = mhlo.and %0, %c : tensor<4xi1>
- return %1 : tensor<4xi1>
- }
- // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"}
- }
-}
-
-// -----
-
-// CHECK-LABEL: hal.executable @shaped_dispatch
-// CHECK-NEXT: hal.interface @legacy_io attributes {push_constants = 2 : index} {
-// CHECK-NEXT: hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-// CHECK-NEXT: hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
-// CHECK-NEXT: }
-flow.executable @shaped_dispatch {
- flow.dispatch.entry @entry
- // CHECK: module {
- module {
- // CHECK: func @entry() {
- // CHECK-NEXT: %[[ZERO:.+]] = constant 0 : index
- // Invariant: Constant loads emitted before binding (tensor) loads.
- // CHECK-NEXT: %[[DIM0:.+]] = hal.interface.load.constant offset = 0 : index
- // CHECK-NEXT: %[[DIM1:.+]] = hal.interface.load.constant offset = 1 : index
- // CHECK-NEXT: %[[ARG0:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<?x7x10xf32>
- // CHECK-NEXT: %[[RET0:.+]] = call @entry_impl(%[[ARG0]], %[[DIM0]], %[[DIM1]]) : (tensor<?x7x10xf32>, index, index) -> tensor<7x?x10xf32>
- // CHECK-NEXT: hal.interface.store.tensor %[[RET0]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<7x?x10xf32>
- // CHECK-NEXT: return
- // CHECK-NEXT: }
- // CHECK-NEXT: func private @entry_impl
- func @entry(%arg0: tensor<?x7x10xf32>, %arg1: index, %arg2: index) -> tensor<7x?x10xf32> {
- %0 = shapex.make_ranked_shape %arg1 : (index) -> !shapex.ranked_shape<[?,7,10]>
- %1 = shapex.make_ranked_shape %arg2 : (index) -> !shapex.ranked_shape<[7,?,10]>
- %2 = shapex.tie_shape %arg0, %0 : tensor<?x7x10xf32>, !shapex.ranked_shape<[?,7,10]>
- %3 = "mhlo.transpose"(%2) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<?x7x10xf32>) -> tensor<7x?x10xf32>
- %4 = shapex.tie_shape %3, %1 : tensor<7x?x10xf32>, !shapex.ranked_shape<[7,?,10]>
- return %4 : tensor<7x?x10xf32>
- }
- }
-}
diff --git a/iree/compiler/Translation/IREEVM.cpp b/iree/compiler/Translation/IREEVM.cpp
index 07dc6ac..e2028d5 100644
--- a/iree/compiler/Translation/IREEVM.cpp
+++ b/iree/compiler/Translation/IREEVM.cpp
@@ -142,15 +142,7 @@
IREE::TFLite::buildTransformPassPipeline(passManager);
}
IREE::Flow::buildInputTransformPassPipeline(passManager);
- // VMLA must go with legacy path.
- bool dispatchLinalgOnTensors = true;
- if (std::find(executableOptions.targets.begin(),
- executableOptions.targets.end(),
- "vmla") != executableOptions.targets.end()) {
- dispatchLinalgOnTensors = false;
- }
- IREE::Flow::buildFlowTransformPassPipeline(passManager,
- dispatchLinalgOnTensors);
+ IREE::Flow::buildFlowTransformPassPipeline(passManager);
IREE::HAL::buildHALTransformPassPipeline(passManager, executableOptions);
IREE::VM::buildVMTransformPassPipeline(passManager, targetOptions);
passManager.addPass(mlir::iree_compiler::IREE::createDropCompilerHintsPass());
diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD
index 65fe4c5..6b326cd 100644
--- a/iree/test/e2e/regression/BUILD
+++ b/iree/test/e2e/regression/BUILD
@@ -42,7 +42,6 @@
"dynamic_torch_index_select_negative.mlir",
"dynamic_torch_index_select_scalar.mlir",
"dynamic_torch_index_select_vector.mlir",
- "executable_benchmark.mlir",
"globals.mlir",
"scalar.mlir",
"trace_dispatch_tensors.mlir",
diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt
index 0c9985b..269a553 100644
--- a/iree/test/e2e/regression/CMakeLists.txt
+++ b/iree/test/e2e/regression/CMakeLists.txt
@@ -22,7 +22,6 @@
"dynamic_torch_index_select_negative.mlir"
"dynamic_torch_index_select_scalar.mlir"
"dynamic_torch_index_select_vector.mlir"
- "executable_benchmark.mlir"
"globals.mlir"
"scalar.mlir"
"trace_dispatch_tensors.mlir"
diff --git a/iree/test/e2e/regression/executable_benchmark.mlir b/iree/test/e2e/regression/executable_benchmark.mlir
deleted file mode 100644
index 5490ec4..0000000
--- a/iree/test/e2e/regression/executable_benchmark.mlir
+++ /dev/null
@@ -1,13 +0,0 @@
-// Only checks registered benchmarks.
-// RUN: iree-translate --iree-hal-target-backends=vmvx -iree-flow-export-benchmark-funcs -iree-mlir-to-vm-bytecode-module %s | iree-benchmark-module --driver=vmvx --benchmark_list_tests=true | IreeFileCheck %s
-
-func @two_dispatch() -> (tensor<5x5xf32>, tensor<3x5xf32>) attributes { iree.module.export } {
- %0 = iree.unfoldable_constant dense<1.0> : tensor<5x3xf32>
- %1 = iree.unfoldable_constant dense<0.4> : tensor<3x5xf32>
- %2 = "mhlo.dot"(%0, %1) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
- %3 = "mhlo.dot"(%1, %2) : (tensor<3x5xf32>, tensor<5x5xf32>) -> tensor<3x5xf32>
- return %2, %3 : tensor<5x5xf32>, tensor<3x5xf32>
-}
-// CHECK: BM_two_dispatch_dispatch_0_benchmark
-// CHECK: BM_two_dispatch_dispatch_1_benchmark
-// CHECK: BM_two_dispatch_benchmark
diff --git a/iree/tools/BUILD b/iree/tools/BUILD
index 7ff2cda..9238ef5 100644
--- a/iree/tools/BUILD
+++ b/iree/tools/BUILD
@@ -83,7 +83,6 @@
deps = [
"//iree/compiler/Bindings/Native/Transforms",
"//iree/compiler/Bindings/TFLite/Transforms",
- "//iree/compiler/Dialect/Flow/Analysis",
"//iree/compiler/Dialect/Flow/IR",
"//iree/compiler/Dialect/Flow/Transforms",
"//iree/compiler/Dialect/HAL/IR:HALDialect",
diff --git a/iree/tools/CMakeLists.txt b/iree/tools/CMakeLists.txt
index b8bed15..44c1d92 100644
--- a/iree/tools/CMakeLists.txt
+++ b/iree/tools/CMakeLists.txt
@@ -153,7 +153,6 @@
MLIRIR
iree::compiler::Bindings::Native::Transforms
iree::compiler::Bindings::TFLite::Transforms
- iree::compiler::Dialect::Flow::Analysis
iree::compiler::Dialect::Flow::IR
iree::compiler::Dialect::Flow::Transforms
iree::compiler::Dialect::HAL::IR::HALDialect
diff --git a/iree/tools/init_iree_passes.h b/iree/tools/init_iree_passes.h
index b52f61b..d3a283e 100644
--- a/iree/tools/init_iree_passes.h
+++ b/iree/tools/init_iree_passes.h
@@ -24,7 +24,6 @@
#include "iree/compiler/Bindings/Native/Transforms/Passes.h"
#include "iree/compiler/Bindings/TFLite/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Analysis/TestPasses.h"
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
#include "iree/compiler/Dialect/HAL/Transforms/Passes.h"
#include "iree/compiler/Dialect/IREE/Transforms/Passes.h"
@@ -47,7 +46,6 @@
IREE::TFLite::registerTransformPassPipeline();
IREE::Flow::registerFlowPasses();
- IREE::Flow::registerFlowAnalysisTestPasses();
IREE::HAL::registerHALPasses();
IREE::registerTransformPasses();
Shape::registerShapeConversionPasses();