Removing a lot of compiler code that was only kept live by VMLA. (#5908)

commit: 636b76732de9b99e15c0cf462828df60714ce795 [log] [tgz]
author: Ben Vanik <benvanik@google.com> Fri May 14 16:23:03 2021 -0700
committer: GitHub <noreply@github.com> Fri May 14 16:23:03 2021 -0700
tree: 7f218ecc409a5b7742b2f663ed440bb882fa2b31
parent: a0f9e3c5ce1ef5d702a15309d2847f5261135515 [diff]
diff --git a/iree/compiler/Conversion/HLOToHLO/BUILD b/iree/compiler/Conversion/HLOToHLO/BUILD
index e874421..ed1236c 100644
--- a/iree/compiler/Conversion/HLOToHLO/BUILD
+++ b/iree/compiler/Conversion/HLOToHLO/BUILD

@@ -21,7 +21,6 @@
 cc_library(
     name = "HLOToHLO",
     srcs = [
-        "DecomposeHLOClamp.cpp",
         "DemoteF32ToF16.cpp",
     ],
     hdrs = [
@@ -35,6 +34,5 @@
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:TransformUtils",
-        "@mlir-hlo//:hlo",
     ],
 )

diff --git a/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt b/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt
index acd6747..68eeb96 100644
--- a/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt
+++ b/iree/compiler/Conversion/HLOToHLO/CMakeLists.txt

@@ -16,7 +16,6 @@
   HDRS
     "Passes.h"
   SRCS
-    "DecomposeHLOClamp.cpp"
     "DemoteF32ToF16.cpp"
   DEPS
     LLVMSupport
@@ -26,7 +25,6 @@
     MLIRTransformUtils
     iree::compiler::Dialect::Flow::IR
     iree::compiler::Dialect::IREE::IR
-    tensorflow::mlir_hlo
   PUBLIC
 )
 

diff --git a/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp b/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp
deleted file mode 100644
index d294d3e..0000000
--- a/iree/compiler/Conversion/HLOToHLO/DecomposeHLOClamp.cpp
+++ /dev/null

@@ -1,78 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-namespace {
-/// A pass to decompose mhlo.clamp ops into mhlo.compare and
-/// mhlo.select ops.
-class DecomposeClampOp : public OpRewritePattern<mhlo::ClampOp> {
- public:
-  using OpRewritePattern<mhlo::ClampOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(mhlo::ClampOp op,
-                                PatternRewriter &rewriter) const override {
-    auto minType = op.min().getType().dyn_cast<RankedTensorType>();
-    auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
-    auto maxType = op.max().getType().dyn_cast<RankedTensorType>();
-
-    if (!operandType) return failure();
-
-    // Reject implicitly broadcasted cases. They should be made explicit first.
-    if (minType != operandType || maxType != operandType) return failure();
-
-    // clamp(a, x, b) = min(max(a, x), b)
-    Location loc = op.getLoc();
-    Value cmpMin = rewriter.create<mhlo::CompareOp>(
-        loc, op.min(), op.operand(), rewriter.getStringAttr("LT"));
-    Value selectMin = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMin,
-                                                      op.operand(), op.min());
-    Value cmpMax = rewriter.create<mhlo::CompareOp>(
-        loc, selectMin, op.max(), rewriter.getStringAttr("LT"));
-    Value selectMax = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMax,
-                                                      selectMin, op.max());
-    rewriter.replaceOp(op, selectMax);
-    return success();
-  }
-};
-
-struct DecomposeHLOClampPass
-    : public PassWrapper<DecomposeHLOClampPass, FunctionPass> {
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<mhlo::MhloDialect>();
-  }
-
-  void runOnFunction() override {
-    MLIRContext *context = &getContext();
-    OwningRewritePatternList patterns(&getContext());
-    patterns.insert<DecomposeClampOp>(context);
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-};
-}  // namespace
-
-std::unique_ptr<OperationPass<FuncOp>> createDecomposeHLOClampPass() {
-  return std::make_unique<DecomposeHLOClampPass>();
-}
-
-static PassRegistration<DecomposeHLOClampPass> pass(
-    "iree-codegen-decompose-hlo-clamp",
-    "Decompose HLO clamp op into primitive ops");
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp b/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp
index d465f5f..8a11b0b 100644
--- a/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp
+++ b/iree/compiler/Conversion/HLOToHLO/DemoteF32ToF16.cpp

@@ -20,7 +20,6 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"

diff --git a/iree/compiler/Conversion/HLOToHLO/Passes.h b/iree/compiler/Conversion/HLOToHLO/Passes.h
index 5edbcb2..d0da837 100644
--- a/iree/compiler/Conversion/HLOToHLO/Passes.h
+++ b/iree/compiler/Conversion/HLOToHLO/Passes.h

@@ -30,9 +30,6 @@
 namespace mlir {
 namespace iree_compiler {
 
-/// Creates a pass to decompose XLA-HLO clamp ops into primitive ops.
-std::unique_ptr<OperationPass<FuncOp>> createDecomposeHLOClampPass();
-
 /// Create a pass to convert a model using f32 type to the equivalent one
 /// using 16.
 std::unique_ptr<OperationPass<ModuleOp>> createDemoteF32ToF16Pass();

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/BUILD b/iree/compiler/Conversion/HLOToLinalg/test/BUILD
index 063de7e..2998812 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/BUILD
+++ b/iree/compiler/Conversion/HLOToLinalg/test/BUILD

@@ -28,7 +28,6 @@
     srcs = enforce_glob(
         [
             "concatenate.mlir",
-            "decompose_hlo_clamp.mlir",
             "dynamic_shape.mlir",
             "fft.mlir",
             "fusion.mlir",

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt b/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt
index af81009..f4fc612 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt
+++ b/iree/compiler/Conversion/HLOToLinalg/test/CMakeLists.txt

@@ -15,7 +15,6 @@
     lit
   SRCS
     "concatenate.mlir"
-    "decompose_hlo_clamp.mlir"
     "dynamic_shape.mlir"
     "fft.mlir"
     "fusion.mlir"

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir b/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
deleted file mode 100644
index a706486..0000000
--- a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
+++ /dev/null

@@ -1,13 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-codegen-decompose-hlo-clamp %s | IreeFileCheck %s
-
-// CHECK-LABEL: func @clamp
-// CHECK-SAME: (%[[MIN:.+]]: tensor<4xf32>, %[[INPUT:.+]]: tensor<4xf32>, %[[MAX:.+]]: tensor<4xf32>)
-func @clamp(%min: tensor<4xf32>, %value: tensor<4xf32>, %max: tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK: %[[CMP_MIN:.+]] = "mhlo.compare"(%[[MIN]], %[[INPUT]]) {comparison_direction = "LT"}
-  // CHECK: %[[SLT_MIN:.+]] = "mhlo.select"(%[[CMP_MIN]], %[[INPUT]], %[[MIN]])
-  // CHECK: %[[CMP_MAX:.+]] = "mhlo.compare"(%[[SLT_MIN]], %[[MAX]]) {comparison_direction = "LT"}
-  // CHECK: %[[SLT_MAX:.+]] = "mhlo.select"(%[[CMP_MAX]], %[[SLT_MIN]], %[[MAX]])
-  // CHECK: return %[[SLT_MAX]]
-  %0 = "mhlo.clamp"(%min, %value, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-  return %0 : tensor<4xf32>
-}

diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 42e8e0b..c5453c1 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h

@@ -44,7 +44,6 @@
 
 inline void registerHLOToLinalgPasses() {
   static bool init_once = []() {
-    createDecomposeHLOClampPass();
     createHLOToLinalgOnTensorsPass();
     createDemoteF32ToF16Pass();
     return true;

diff --git a/iree/compiler/Dialect/Flow/Analysis/BUILD b/iree/compiler/Dialect/Flow/Analysis/BUILD
deleted file mode 100644
index ccda835..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/BUILD
+++ /dev/null

@@ -1,42 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-cc_library(
-    name = "Analysis",
-    srcs = [
-        "Dispatchability.cpp",
-        "DispatchabilityTest.cpp",
-    ],
-    hdrs = [
-        "Dispatchability.h",
-        "TestPasses.h",
-    ],
-    deps = [
-        "//iree/compiler/Dialect/Flow/IR",
-        "//iree/compiler/Dialect/Flow/Utils",
-        "//iree/compiler/Dialect/IREE/IR",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:Support",
-        "@mlir-hlo//:hlo",
-    ],
-)

diff --git a/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt b/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt
deleted file mode 100644
index db8f0fc..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/CMakeLists.txt
+++ /dev/null

@@ -1,35 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/Flow/Analysis/BUILD                                    #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_cc_library(
-  NAME
-    Analysis
-  HDRS
-    "Dispatchability.h"
-    "TestPasses.h"
-  SRCS
-    "Dispatchability.cpp"
-    "DispatchabilityTest.cpp"
-  DEPS
-    LLVMSupport
-    MLIRIR
-    MLIRPass
-    MLIRStandard
-    MLIRSupport
-    iree::compiler::Dialect::Flow::IR
-    iree::compiler::Dialect::Flow::Utils
-    iree::compiler::Dialect::IREE::IR
-    tensorflow::mlir_hlo
-  PUBLIC
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###

diff --git a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
deleted file mode 100644
index 53b2b6f..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
+++ /dev/null

@@ -1,162 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-
-#include <list>
-
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "llvm/ADT/SetVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/SymbolTable.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// static
-LogicalResult Dispatchability::annotateIR(ModuleOp moduleOp) {
-  Dispatchability dispatchability;
-  if (failed(dispatchability.recalculate(moduleOp))) {
-    moduleOp.emitOpError()
-        << "failed to analyze dispatchability for the module";
-    return failure();
-  }
-
-  Builder builder(moduleOp.getContext());
-  SymbolTable symbolTable(moduleOp);
-  for (auto &funcDispatchability : dispatchability.funcDispatchability_) {
-    auto funcOp = symbolTable.lookup<FuncOp>(funcDispatchability.first);
-    funcOp->setAttr("dispatchable",
-                    builder.getBoolAttr(funcDispatchability.second));
-  }
-
-  return success();
-}
-
-LogicalResult Dispatchability::recalculate(ModuleOp moduleOp) {
-  funcDispatchability_.clear();
-  funcCloneModuleOp_ = ModuleOp::create(UnknownLoc::get(moduleOp.getContext()));
-  funcClones_.clear();
-
-  // Run through all functions until we are able to compute their
-  // dispatchability. We do this so that we can determine if calls are allowed.
-  OpBuilder cloneBuilder(funcCloneModuleOp_.get());
-  std::vector<FuncOp> nextWorklist(moduleOp.getOps<FuncOp>().begin(),
-                                   moduleOp.getOps<FuncOp>().end());
-  std::vector<FuncOp> worklist;
-  bool anyChanged;
-  do {
-    anyChanged = false;
-    worklist.swap(nextWorklist);
-    nextWorklist.clear();
-    for (auto funcOp : worklist) {
-      auto isDispatchable = computeDispatchability(funcOp);
-      if (isDispatchable.hasValue()) {
-        funcDispatchability_[funcOp.getName()] = isDispatchable.getValue();
-        if (isDispatchable.getValue()) {
-          auto clonedFuncOp = cast<FuncOp>(cloneBuilder.clone(*funcOp));
-          funcClones_[funcOp.getName()] = clonedFuncOp;
-          funcCloneModuleOp_->push_back(clonedFuncOp);
-        }
-        anyChanged = true;
-      } else {
-        nextWorklist.push_back(funcOp);
-      }
-    }
-  } while (anyChanged);
-  if (!nextWorklist.empty()) {
-    return moduleOp.emitError() << "cycle detected in dispatchability analysis";
-  }
-
-  return success();
-}
-
-Optional<bool> Dispatchability::computeDispatchability(FuncOp funcOp) {
-  if (funcOp.isExternal()) {
-    // We assume all imports have side-effects right now, but that may not be
-    // the case. We should add an attribute and check for it.
-    return false;
-  }
-
-  // TODO(b/144530470): replace with tablegen attributes/interfaces.
-  for (auto &block : funcOp.getBlocks()) {
-    for (auto &op : block.getOperations()) {
-      if (!IREE::Flow::isOpOfKnownDialect(&op)) {
-        // Custom dialects aren't dispatchable (yet).
-        return false;
-      } else if (auto callOp = dyn_cast<CallOp>(op)) {
-        if (callOp.getCallee() == funcOp.getName()) {
-          // Recursion.
-          continue;
-        }
-        auto it = funcDispatchability_.find(callOp.callee());
-        if (it == funcDispatchability_.end()) {
-          // Not yet calculated - yield.
-          return llvm::None;
-        }
-        return it->second;
-      } else if (isa<CallIndirectOp>(op)) {
-        // Indirect calls are not supported and must first be devirtualized.
-        return false;
-      } else if (isa<mlir::ReturnOp>(op)) {
-        // TODO(benvanik): widen to all known terminators? sometimes they may
-        // have side-effects.
-        continue;
-      } else if (isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op)) {
-        // Some unfusable ops must remain on their own.
-        return false;
-      } else if (isa<mhlo::ReduceOp>(op) || isa<mhlo::ReduceWindowOp>(op)) {
-        // Reductions always become flow ops.
-        return false;
-
-        // TODO: Properly handle region side effects.
-      } else if (!MemoryEffectOpInterface::hasNoEffect(&op) ||
-                 op.getNumRegions() != 0) {
-        // Ops with side-effects cannot be dispatched as we must be able to
-        // exactly model I/O.
-        return false;
-      }
-    }
-  }
-
-  // All cases not handled above are (probably) dispatchable. This makes what we
-  // do here a blocklist, though as we move towards more frontend dialects that
-  // may not be the best idea.
-  return true;
-}
-
-void Dispatchability::walkDispatchableOps(
-    function_ref<void(FuncOp funcOp)> fn) {
-  for (auto funcOp : funcClones_) {
-    fn(funcOp.second);
-  }
-}
-
-bool Dispatchability::isDispatchable(StringRef funcName) {
-  return funcDispatchability_[funcName];
-}
-
-bool Dispatchability::isDispatchable(FuncOp funcOp) {
-  return isDispatchable(funcOp.getName());
-}
-
-bool Dispatchability::isInvalidated(
-    const AnalysisManager::PreservedAnalyses &pa) {
-  return false;
-}
-
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h
deleted file mode 100644
index bfbb4f9..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.h
+++ /dev/null

@@ -1,70 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_
-#define IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_
-
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/AnalysisManager.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Analyzes functions in a module to determine whether they can be performed as
-// part of a dispatch operation. Functions must meet a set of criteria defining
-// "dispatchability" such as the lack of side effects.
-class Dispatchability {
- public:
-  // Annotates the IR with the dispatchability information. This is only
-  // required if the dispatchability information is interesting to persist
-  // beyond transformation, such as in tests.
-  static LogicalResult annotateIR(ModuleOp moduleOp);
-
-  Dispatchability() = default;
-  explicit Dispatchability(Operation *op) {
-    (void)recalculate(cast<ModuleOp>(op));
-  }
-  Dispatchability(Dispatchability &&) = default;
-  Dispatchability &operator=(Dispatchability &&) = default;
-  Dispatchability(const Dispatchability &) = delete;
-  Dispatchability &operator=(const Dispatchability &) = delete;
-
-  // Recalculates the dispatchability information for the given module.
-  LogicalResult recalculate(ModuleOp moduleOp);
-
-  // Calls |fn| for each dispatchable function.
-  void walkDispatchableOps(function_ref<void(FuncOp funcOp)> fn);
-
-  // Returns true if |funcOp| is dispatchable.
-  bool isDispatchable(StringRef funcName);
-  bool isDispatchable(FuncOp funcOp);
-  bool isInvalidated(const AnalysisManager::PreservedAnalyses &pa);
-
- private:
-  // Returns true if the given function is dispatch compatible.
-  // Returns None if the dispatchability can't yet be calculated as dependent
-  // functions have not been processed.
-  Optional<bool> computeDispatchability(FuncOp funcOp);
-
-  DenseMap<StringRef, bool> funcDispatchability_;
-  OwningModuleRef funcCloneModuleOp_;
-  DenseMap<StringRef, FuncOp> funcClones_;
-};
-
-}  // namespace iree_compiler
-}  // namespace mlir
-
-#endif  // IREE_COMPILER_DIALECT_FLOW_ANALYSIS_DISPATCHABILITY_H_

diff --git a/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp b/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp
deleted file mode 100644
index a25b9b1..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/DispatchabilityTest.cpp
+++ /dev/null

@@ -1,45 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-class DispatchabilityTestPass
-    : public PassWrapper<DispatchabilityTestPass, OperationPass<ModuleOp>> {
- public:
-  void runOnOperation() override {
-    if (failed(Dispatchability::annotateIR(getOperation()))) {
-      signalPassFailure();
-    }
-  }
-};
-
-namespace IREE {
-namespace Flow {
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityTestPass() {
-  return std::make_unique<DispatchabilityTestPass>();
-}
-}  // namespace Flow
-}  // namespace IREE
-
-static PassRegistration<DispatchabilityTestPass> pass(
-    "test-iree-flow-dispatchability",
-    "Test pass used for dispatchability analysis");
-
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Analysis/TestPasses.h b/iree/compiler/Dialect/Flow/Analysis/TestPasses.h
deleted file mode 100644
index 4e54995..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/TestPasses.h
+++ /dev/null

@@ -1,48 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
-#define IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-//===----------------------------------------------------------------------===//
-// Test passes
-//===----------------------------------------------------------------------===//
-
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityTestPass();
-
-//===----------------------------------------------------------------------===//
-// Register all analysis passes
-//===----------------------------------------------------------------------===//
-
-inline void registerFlowAnalysisTestPasses() {
-  createDispatchabilityTestPass();
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
-
-#endif  // IREE_COMPILER_DIALECT_FLOW_ANALYSIS_TESTPASSES_H_
\ No newline at end of file

diff --git a/iree/compiler/Dialect/Flow/Analysis/test/BUILD b/iree/compiler/Dialect/Flow/Analysis/test/BUILD
deleted file mode 100644
index 8f34c74..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/BUILD
+++ /dev/null

@@ -1,34 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-load("//iree:lit_test.bzl", "iree_lit_test_suite")
-load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-iree_lit_test_suite(
-    name = "lit",
-    srcs = enforce_glob(
-        ["dispatchability.mlir"],
-        include = ["*.mlir"],
-    ),
-    data = [
-        "//iree/tools:IreeFileCheck",
-        "//iree/tools:iree-opt",
-    ],
-)

diff --git a/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt
deleted file mode 100644
index 26aafe0..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/CMakeLists.txt
+++ /dev/null

@@ -1,23 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/Flow/Analysis/test/BUILD                               #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_lit_test_suite(
-  NAME
-    lit
-  SRCS
-    "dispatchability.mlir"
-  DATA
-    iree::tools::IreeFileCheck
-    iree::tools::iree-opt
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###

diff --git a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir b/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
deleted file mode 100644
index 9fabb11..0000000
--- a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
+++ /dev/null

@@ -1,92 +0,0 @@
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file -test-iree-flow-dispatchability %s | IreeFileCheck %s
-
-// CHECK-LABEL: @empty
-// CHECK-SAME: dispatchable = true
-func @empty() {
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @customOp
-// CHECK-SAME: dispatchable = false
-func @customOp() {
-  "do.foo"() : () -> ()
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @simpleMath
-// CHECK-SAME: dispatchable = true
-func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @stdElementwiseOps
-// CHECK-SAME: dispatchable = true
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = addf %arg0, %arg0 : tensor<4xf32>
-  %1 = subf %0, %arg0 : tensor<4xf32>
-  %2 = mulf %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @hloElementwiseOps
-// CHECK-SAME: dispatchable = true
-func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @interleavedDot
-// CHECK-SAME: dispatchable = false
-func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
-  return %2 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @caller
-// CHECK-SAME: dispatchable = true
-func @caller(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-// CHECK-LABEL: func @callee
-// CHECK-SAME: dispatchable = true
-func @callee(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @dotCaller
-// CHECK-SAME: dispatchable = false
-func @dotCaller(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = call @dotCallee(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
-  return %2 : tensor<4x4xf32>
-}
-// CHECK-LABEL: func @dotCallee
-// CHECK-SAME: dispatchable = false
-func @dotCallee(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = "mhlo.dot"(%arg0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  return %0 : tensor<4x4xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index e1e3d54..8c0a10a 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp

@@ -350,11 +350,6 @@
 // Dispatch ops
 //===----------------------------------------------------------------------===//
 
-void DispatchRegionOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<ClosureOptimizationPattern<DispatchRegionOp>>(context);
-}
-
 void DispatchWorkgroupsOp::getCanonicalizationPatterns(
     OwningRewritePatternList &results, MLIRContext *context) {
   results.insert<ClosureOptimizationPattern<DispatchWorkgroupsOp>>(context);

diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h b/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h
index 2e2da8a..f38170b 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h
+++ b/iree/compiler/Dialect/Flow/IR/FlowOpUtils.h

@@ -53,13 +53,6 @@
 // performed on the op. Returns true if the op was optimized.
 bool optimizeClosureLikeOp(ClosureOpInterface &closureOp,
                            PatternRewriter *rewriter = nullptr);
-template <typename T>
-inline bool optimizeClosureOp(T &op, PatternRewriter *rewriter = nullptr) {
-  auto closureOp = cast<ClosureOpInterface>(op.getOperation());
-  bool didOptimize = optimizeClosureLikeOp(closureOp, rewriter);
-  op = dyn_cast_or_null<DispatchRegionOp>(closureOp.getOperation());
-  return didOptimize;
-}
 
 // A pattern that optimizes the given region-containing op T (CSE, DCE, etc).
 // Duplicate operands will be combined and unused operands and results will be

diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index 51f409e..6fb8e8f 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp

@@ -561,316 +561,6 @@
 }
 
 //===----------------------------------------------------------------------===//
-// flow.dispatch.region
-//===----------------------------------------------------------------------===//
-
-/// Inlines operation |op| into the |dispatchRegionOp| by making all operands,
-/// as well as values caputred implicitly by the regions of the operation, that
-/// are outside the dispatch region operands of the dispatch region as well.
-static Operation *inlineOpIntoDispatchRegion(OpBuilder &builder,
-                                             DispatchRegionOp dispatchRegionOp,
-                                             Operation *op,
-                                             BlockAndValueMapping &map) {
-  llvm::SetVector<Value> capturedInputs(op->getOperands().begin(),
-                                        op->getOperands().end());
-  getUsedValuesDefinedAbove(op->getRegions(), capturedInputs);
-  Block *block = builder.getInsertionBlock();
-  for (Value capturedInput : capturedInputs) {
-    if (map.contains(capturedInput)) continue;
-    dispatchRegionOp.getOperation()->insertOperands(
-        dispatchRegionOp.getOperation()->getNumOperands(), {capturedInput});
-    Value newBlockArgument = block->addArgument(capturedInput.getType());
-    map.map(capturedInput, newBlockArgument);
-  }
-
-  return builder.clone(*op, map);
-}
-
-llvm::Optional<std::pair<DispatchRegionOp, Operation *>>
-DispatchRegionOp::formFromAnchorOp(Value workload, Operation *anchorOp,
-                                   OpBuilder &builder) {
-  builder.setInsertionPoint(anchorOp);
-  auto loc = anchorOp->getLoc();
-  // Map anchor into new dispatch region.
-  auto drOp = builder.create<DispatchRegionOp>(
-      loc, llvm::to_vector<1>(anchorOp->getResultTypes()), workload,
-      ArrayRef<Value>());
-  auto *drBlock = new Block();
-  drOp.body().push_back(drBlock);
-  BlockAndValueMapping mapping;
-  builder.setInsertionPointToEnd(drBlock);
-  Operation *newAnchorOp =
-      inlineOpIntoDispatchRegion(builder, drOp, anchorOp, mapping);
-
-  // Insert terminator
-  builder.create<IREE::Flow::ReturnOp>(loc, newAnchorOp->getResults());
-
-  // Replace anchor uses with region result.
-  for (auto it : llvm::enumerate(anchorOp->getResults())) {
-    it.value().replaceAllUsesWith(drOp.getResult(it.index()));
-  }
-  anchorOp->erase();
-  return std::make_pair(drOp, newAnchorOp);
-}
-
-// Clones an operation with new result types.
-// The original operation will be erased and a new operation constructed
-// in its place.
-static Operation *cloneWithNewResultTypes(Operation *op,
-                                          TypeRange newResultTypes) {
-  OperationState state(op->getLoc(), op->getName());
-  state.addOperands(op->getOperands());
-  state.addTypes(newResultTypes);
-  state.addSuccessors(op->getSuccessors());
-  state.addAttributes(op->getAttrs());
-  for (unsigned i = 0, e = op->getNumRegions(); i < e; ++i) {
-    state.addRegion();
-  }
-  Operation *newOp = Operation::create(state);
-  for (unsigned i = 0, e = op->getNumRegions(); i < e; ++i) {
-    newOp->getRegion(i).takeBody(op->getRegion(i));
-  }
-  return newOp;
-}
-
-ResultRange DispatchRegionOp::appendResults(DispatchRegionOp &self,
-                                            ValueRange addlResults,
-                                            OpBuilder &builder) {
-  Block &block = self.body().front();
-
-  unsigned origNumResults = self.getNumResults();
-  llvm::SmallVector<Type, 4> newTypes(self.getResultTypes().begin(),
-                                      self.getResultTypes().end());
-  for (auto r : addlResults) newTypes.push_back(r.getType());
-
-  // Changing the arity of the results requires replacing the dispatch region.
-  builder.setInsertionPoint(self);
-  auto newDrOp = llvm::cast<DispatchRegionOp>(
-      builder.insert(cloneWithNewResultTypes(self, newTypes)));
-  self.replaceAllUsesWith(newDrOp->getResults().take_front(origNumResults));
-  self.erase();
-  self = newDrOp;
-
-  // Add results to the terminator.
-  auto terminator = block.getTerminator();
-  llvm::SmallVector<Value, 4> returns(terminator->getOperands());
-  returns.append(addlResults.begin(), addlResults.end());
-  terminator->setOperands(returns);
-
-  return self->getResults().slice(origNumResults, addlResults.size());
-}
-
-Operation *DispatchRegionOp::inlineOp(Operation *origOp, OpBuilder &builder,
-                                      bool positionAtEnd) {
-  Block &block = body().front();
-  if (positionAtEnd) {
-    builder.setInsertionPoint(block.getTerminator());
-  } else {
-    builder.setInsertionPointToStart(&block);
-  }
-  // Map existing dr args.
-  BlockAndValueMapping mapping;
-  for (unsigned i = 0, e = block.getNumArguments(); i < e; ++i) {
-    mapping.map(args()[i], block.getArgument(i));
-  }
-
-  // Also map any terminator operands to support inlining at the end.
-  for (auto it : llvm::enumerate(block.getTerminator()->getOperands())) {
-    mapping.map(getResult(it.index()), it.value());
-  }
-
-  // Remember the values corresponding to original op results.
-  llvm::SmallVector<Value, 4> origOpResultValues;
-  for (Value result : origOp->getResults()) {
-    origOpResultValues.push_back(mapping.lookupOrNull(result));
-  }
-
-  Operation *inlinedOp =
-      inlineOpIntoDispatchRegion(builder, *this, origOp, mapping);
-
-  // Replace any results from the orig with results from the clone.
-  for (unsigned i = 0, e = origOp->getNumResults(); i < e; ++i) {
-    Value resultTo = origOpResultValues[i];
-    if (resultTo) {
-      resultTo.replaceAllUsesWith(inlinedOp->getResult(i));
-    }
-  }
-
-  return inlinedOp;
-}
-
-void DispatchRegionOp::build(OpBuilder &builder, OperationState &state,
-                             ArrayRef<Type> resultTypes, Value workload,
-                             ValueRange args,
-                             ArrayRef<NamedAttribute> attributes) {
-  state.addTypes(resultTypes);
-  state.addOperands({workload});
-  state.addOperands(args);
-  state.addAttributes(attributes);
-  state.addRegion();
-}
-
-ParseResult parseDispatchRegionOp(OpAsmParser &parser, OperationState *result) {
-  // Parse required workload.
-  OpAsmParser::OperandType workloadArg;
-  Type workloadArgType;
-  if (failed(parser.parseLSquare()) ||
-      failed(parser.parseOperand(workloadArg)) ||
-      failed(parser.parseColonType(workloadArgType)) ||
-      failed(parser.parseRSquare()) ||
-      failed(parser.resolveOperand(workloadArg, workloadArgType,
-                                   result->operands))) {
-    return failure();
-  }
-
-  // Parse (optional) args.
-  SmallVector<OpAsmParser::OperandType, 16> regionArgs;
-  SmallVector<Type, 16> regionArgTypes;
-  if (failed(parser.parseLParen())) {
-    return failure();
-  }
-  if (failed(parser.parseOptionalRParen())) {
-    SmallVector<OpAsmParser::OperandType, 16> regionOperands;
-    auto argsLoc = parser.getCurrentLocation();
-    do {
-      // Reserve entries in the lists.
-      regionArgs.emplace_back();
-      regionOperands.emplace_back();
-      regionArgTypes.emplace_back();
-      if (failed(parser.parseRegionArgument(regionArgs.back())) ||
-          failed(parser.parseEqual()) ||
-          failed(parser.parseOperand(regionOperands.back())) ||
-          failed(parser.parseColonType(regionArgTypes.back()))) {
-        return failure();
-      }
-    } while (succeeded(parser.parseOptionalComma()));
-    if (failed(parser.parseRParen()) ||
-        failed(parser.resolveOperands(regionOperands, regionArgTypes, argsLoc,
-                                      result->operands))) {
-      return failure();
-    }
-  }
-
-  // Parse (optional) results.
-  if (failed(parser.parseOptionalArrowTypeList(result->types))) {
-    return failure();
-  }
-
-  // Parse region body.
-  Region *body = result->addRegion();
-  if (failed(parser.parseRegion(*body, regionArgs, regionArgTypes)) ||
-      failed(parser.parseOptionalAttrDict(result->attributes))) {
-    return failure();
-  }
-  return success();
-}
-
-void printDispatchRegionOp(OpAsmPrinter &p, DispatchRegionOp op) {
-  p << op.getOperationName();
-
-  // Print the workload argument.
-  p << "[";
-  p.printOperand(op.workload());
-  p << " : ";
-  p.printType(op.workload().getType());
-  p << "]";
-
-  // Print the data argument remapping.
-  p << "(";
-  interleaveComma(llvm::zip(op.body().getArguments(), op.args()), p,
-                  [&](std::tuple<BlockArgument, Value> it) {
-                    p << std::get<0>(it) << " = " << std::get<1>(it);
-                    p << " : ";
-                    p << std::get<1>(it).getType();
-                  });
-  p << ")";
-
-  // Print the result types, if any.
-  if (op.getNumResults() > 0) {
-    p << " -> (";
-    interleaveComma(op.getResultTypes(), p);
-    p << ")";
-  }
-
-  p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
-  p.printOptionalAttrDict(op->getAttrs(),
-                          /*elidedAttrs=*/{});
-}
-
-Operation::operand_range DispatchRegionOp::getClosureOperands() {
-  return args();
-}
-
-Operation::result_range DispatchRegionOp::getClosureResults() {
-  return results();
-}
-
-// TODO(#4897): allow non-splat constants - current paths can't handle them.
-static bool canDispatchRegionContainOpIssue4897(Operation *op) {
-  if (auto constantOp = dyn_cast<ConstantOp>(op)) {
-    auto constantValueAttr = constantOp.getValue();
-    auto constantType = constantOp.getType();
-    if (constantValueAttr.isa<SplatElementsAttr>()) {
-      return true;
-    } else if (auto denseAttr =
-                   constantValueAttr.dyn_cast<DenseElementsAttr>()) {
-      return denseAttr.isSplat();
-    } else if (constantType.isIntOrIndexOrFloat()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Inline operations that the dispatch region can handle natively.
-static bool canDispatchRegionContainOp(Operation *op) {
-  // Inline constant operations that are splat or small constants.
-  if (auto constantOp = dyn_cast<ConstantOp>(op)) {
-    auto constantValueAttr = constantOp.getValue();
-    auto constantType = constantOp.getType();
-    if (constantValueAttr.isa<SplatElementsAttr>()) {
-      return true;
-    } else if (auto denseAttr =
-                   constantValueAttr.dyn_cast<DenseElementsAttr>()) {
-      // TODO(GH-4897): Non-splat constants seems to have an issue on the LLLVM
-      // side. Uncomment after that is fixed.
-      auto shapedType = constantOp.getType().cast<ShapedType>();
-      uint64_t estimatedByteLength =
-          (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) /
-          8;
-      return denseAttr.isSplat() ||
-             estimatedByteLength <= clInlineConstantByteLength;
-    } else if (constantType.isIntOrIndexOrFloat()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool DispatchRegionOp::canClosureContainOp(Operation *op) {
-  return canDispatchRegionContainOpIssue4897(op);
-}
-
-ClosureOpInterface
-DispatchRegionOp::cloneReplacementExcludingOperandsAndResults(
-    ArrayRef<unsigned> excludedOperandIndices,
-    ArrayRef<unsigned> excludedResultIndices) {
-  SmallVector<Type, 4> newResultTypes = llvm::to_vector<4>(getResultTypes());
-  SmallVector<Value, 4> newOperandsValues = llvm::to_vector<4>(args());
-  excludeClosureOperandsAndResults(newOperandsValues, excludedOperandIndices,
-                                   newResultTypes, excludedResultIndices);
-  auto newOp = OpBuilder(getContext())
-                   .create<DispatchRegionOp>(getLoc(), newResultTypes,
-                                             workload(), newOperandsValues,
-                                             getOperation()->getAttrs());
-  auto &newBody = newOp.getClosureBodyRegion();
-  newBody.takeBody(getClosureBodyRegion());
-  eraseRegionResults(newBody, excludedResultIndices);
-  newBody.front().eraseArguments(excludedOperandIndices);
-  return newOp;
-}
-
-//===----------------------------------------------------------------------===//
 // flow.dispatch.tensor.load
 //===----------------------------------------------------------------------===//
 
@@ -1092,6 +782,31 @@
   return results();
 }
 
+// Inline operations that the dispatch region can handle natively.
+static bool canDispatchRegionContainOp(Operation *op) {
+  // Inline constant operations that are splat or small constants.
+  if (auto constantOp = dyn_cast<ConstantOp>(op)) {
+    auto constantValueAttr = constantOp.getValue();
+    auto constantType = constantOp.getType();
+    if (constantValueAttr.isa<SplatElementsAttr>()) {
+      return true;
+    } else if (auto denseAttr =
+                   constantValueAttr.dyn_cast<DenseElementsAttr>()) {
+      // TODO(GH-4897): Non-splat constants seems to have an issue on the LLVM
+      // side. Uncomment after that is fixed.
+      auto shapedType = constantOp.getType().cast<ShapedType>();
+      uint64_t estimatedByteLength =
+          (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) /
+          8;
+      return denseAttr.isSplat() ||
+             estimatedByteLength <= clInlineConstantByteLength;
+    } else if (constantType.isIntOrIndexOrFloat()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool DispatchWorkgroupsOp::canClosureContainOp(Operation *op) {
   return canDispatchRegionContainOp(op);
 }

diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td
index d946a36..7910ae4 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td

@@ -174,76 +174,6 @@
 // Partitioned regions
 //===----------------------------------------------------------------------===//
 
-def FLOW_DispatchRegionOp : FLOW_PureOp<"dispatch.region", [
-    IsolatedFromAbove,
-    DeclareOpInterfaceMethods<FLOW_ClosureOpInterface>,
-  ]> {
-  let summary = [{partitioned region representing a dispatched workload}];
-  let description = [{
-    A closure that represents a functional dispatch unit. These perform
-    computations in a way that can be lowered to target executable formats such
-    as SPIR-V for execution.
-
-    Ops that are identified as "dispatchable" are grouped into dispatch regions
-    and compatible dispatch regions are folded together. What remains outside of
-    the dispatch regions is the glue required to schedule the work (commonly
-    referred to as "host" code, even if it doesn't run on an AP).
-
-    Dispatch regions are modeled using value semantics: it is assumed that all
-    arguments are read-only and that the dispatch regions themselves have no
-    side-effects.
-  }];
-
-  let arguments = (ins
-    FLOW_Workload:$workload,
-    Variadic<AnyType>:$args
-  );
-  let results = (outs
-    Variadic<AnyType>:$results
-  );
-
-  let regions = (region AnyRegion:$body);
-
-  let extraClassDeclaration = [{
-    /// Forms a dispatch region around a given anchor operation, returning
-    /// the new DispatchRegionOp and anchor operation within the region.
-    /// Returns llvm::None on failure.
-    /// The insertion point of the OpBuilder will be modified.
-    static llvm::Optional<std::pair<DispatchRegionOp, Operation *>>
-        formFromAnchorOp(Value workload, Operation *anchorOp,
-                         OpBuilder &builder);
-
-    // Appends results to the dispatch region. This will re-allocate the
-    // DispatchRegionOp itself but preserve the contained body block.
-    // Returns a ResultRange for the new dispatch region op's results
-    // corresponding to addlResults.
-    static ResultRange appendResults(
-        DispatchRegionOp &self, ValueRange addlResults, OpBuilder &builder);
-
-    /// Returns the index of the args() operand in the Operation operands list.
-    unsigned mapArgOperandToOpOperand(unsigned i) { return i + 1; }
-
-    /// Inlines an op into the dispatch region.
-    /// By default, this will inline the op at the beginning of the region.
-    /// Set positionAtEnd=true to inline at the end. This is not a general
-    /// IR splicing helper: it can only inline ops with inputs that map to
-    /// either captured operands or results and is used to coelesce an op
-    /// into an adjacent dispatch region.
-    /// Note that the original op is cloned but not erased. It is up to the
-    /// caller to cleanup the original op as needed.
-    Operation *inlineOp(Operation *origOp, OpBuilder &builder,
-        bool positionAtEnd=false);
-  }];
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<(ins "ArrayRef<Type>":$resultTypes, "Value":$workload,
-      "ValueRange":$args, CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
-  ];
-
-  let hasCanonicalizer = 1;
-}
-
 def FLOW_DispatchWorkgroupsOp : FLOW_PureOp<"dispatch.workgroups", [
     IsolatedFromAbove,
     AttrSizedOperandSegments,

diff --git a/iree/compiler/Dialect/Flow/IR/test/BUILD b/iree/compiler/Dialect/Flow/IR/test/BUILD
index e7c420a..997322b 100644
--- a/iree/compiler/Dialect/Flow/IR/test/BUILD
+++ b/iree/compiler/Dialect/Flow/IR/test/BUILD

@@ -26,8 +26,6 @@
     srcs = enforce_glob(
         [
             "dispatch_ops.mlir",
-            "dispatch_region_folding.mlir",
-            "dispatch_regions.mlir",
             "dispatch_tensor_folding.mlir",
             "dispatch_workgroups.mlir",
             "dispatch_workgroups_folding.mlir",

diff --git a/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt
index 0c2f8a3..7ad3dd3 100644
--- a/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/IR/test/CMakeLists.txt

@@ -15,8 +15,6 @@
     lit
   SRCS
     "dispatch_ops.mlir"
-    "dispatch_region_folding.mlir"
-    "dispatch_regions.mlir"
     "dispatch_tensor_folding.mlir"
     "dispatch_workgroups.mlir"
     "dispatch_workgroups_folding.mlir"

diff --git a/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir
deleted file mode 100644
index fb678b8..0000000
--- a/iree/compiler/Dialect/Flow/IR/test/dispatch_region_folding.mlir
+++ /dev/null

@@ -1,19 +0,0 @@
-// RUN: iree-opt -split-input-file -canonicalize %s | iree-opt -split-input-file | IreeFileCheck %s
-
-// CHECK-LABEL: @dceOperandsAndResults
-func @dceOperandsAndResults(%arg0 : tensor<?xf32>) -> (tensor<?xf32>) {
-  // CHECK: %[[WORKLOAD:.+]] = constant 5
-  %workload = constant 5 : index
-  // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD]] : index]
-  // CHECK-SAME: (%[[CA1:.+]] = %arg0 : tensor<?xf32>) -> (tensor<?xf32>)
-  // CHECK: %[[DR0:.+]] = addf %[[CA1]], %[[CA1]]
-  // CHECK: flow.return %[[DR0]] : tensor<?xf32>
-  %ret0, %ret1 = flow.dispatch.region[%workload : index](
-      %i0 = %arg0 : tensor<?xf32>, %i1 = %arg0 : tensor<?xf32>, %i2 = %arg0 : tensor<?xf32>)
-      -> (tensor<?xf32>, tensor<?xf32>) {
-    %1 = addf %i0, %i1 : tensor<?xf32>
-    flow.return %1, %i2 : tensor<?xf32>, tensor<?xf32>
-  }
-  // CHECK: return %[[R0]] : tensor<?xf32>
-  return %ret0 : tensor<?xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir b/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir
deleted file mode 100644
index d06d20c..0000000
--- a/iree/compiler/Dialect/Flow/IR/test/dispatch_regions.mlir
+++ /dev/null

@@ -1,65 +0,0 @@
-// Tests printing and parsing of dispatch region ops.
-
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s | iree-opt -allow-unregistered-dialect -split-input-file | IreeFileCheck %s
-
-// CHECK-LABEL: @singleArg
-func @singleArg(%arg0 : tensor<?xf32>) {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
-  // CHECK-NEXT: flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) {
-  // CHECK-NEXT:   flow.return
-  // CHECK-NEXT: }
-  %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
-  flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) {
-    flow.return
-  }
-  // CHECK-NEXT: return
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @multipleArgs
-func @multipleArgs(%arg0 : tensor<?xf32>, %arg1 : tensor<?xf32>) {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
-  // CHECK-NEXT: flow.dispatch.region[%[[WORKLOAD]] : index](%arg2 = %arg0 : tensor<?xf32>, %arg3 = %arg1 : tensor<?xf32>) {
-  // CHECK-NEXT:   flow.return
-  // CHECK-NEXT: }
-  %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
-  flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>, %i1 = %arg1 : tensor<?xf32>) {
-    flow.return
-  }
-  // CHECK-NEXT: return
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @singleResult
-func @singleResult(%arg0 : tensor<?xf32>) -> tensor<?xf32> {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
-  // CHECK-NEXT: %1 = flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>) {
-  // CHECK-NEXT:   flow.return %arg1 : tensor<?xf32>
-  // CHECK-NEXT: }
-  %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
-  %ret0 = flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) -> tensor<?xf32> {
-    flow.return %i0 : tensor<?xf32>
-  }
-  // CHECK-NEXT: return %1 : tensor<?xf32>
-  return %ret0 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @multipleResults
-func @multipleResults(%arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = "some.shape"
-  // CHECK-NEXT: %1:2 = flow.dispatch.region[%[[WORKLOAD]] : index](%arg1 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
-  // CHECK-NEXT:   flow.return %arg1, %arg1 : tensor<?xf32>, tensor<?xf32>
-  // CHECK-NEXT: }
-  %workload = "some.shape"(%arg0) : (tensor<?xf32>) -> index
-  %ret0, %ret1 = flow.dispatch.region[%workload : index](%i0 = %arg0 : tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
-    flow.return %i0, %i0 : tensor<?xf32>, tensor<?xf32>
-  }
-  // CHECK-NEXT: return %1#0, %1#1 : tensor<?xf32>, tensor<?xf32>
-  return %ret0, %ret1 : tensor<?xf32>, tensor<?xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/Transforms/BUILD b/iree/compiler/Dialect/Flow/Transforms/BUILD
index 8d42e05..9b75f91 100644
--- a/iree/compiler/Dialect/Flow/Transforms/BUILD
+++ b/iree/compiler/Dialect/Flow/Transforms/BUILD

@@ -42,19 +42,14 @@
         "ConvertToFlowTensorOps.cpp",
         "DeduplicateExecutables.cpp",
         "DestructiveUpdateUtils.cpp",
-        "DispatchConfig.cpp",
         "DispatchLinalgOnTensors.cpp",
-        "DispatchabilityAnalysis.cpp",
         "ExpandVariableDynamicDims.cpp",
         "ExportBenchmarkFuncs.cpp",
-        "FoldCompatibleDispatchRegions.cpp",
         "FormStreams.cpp",
         "HLOToHLOPreprocessing.cpp",
         "HoistUnstreamableOps.cpp",
-        "IdentifyDispatchRegions2.cpp",
         "InjectDispatchTracing.cpp",
         "LegalizeInputTypes.cpp",
-        "OutlineDispatchRegions.cpp",
         "OutlineDispatchRegions2.cpp",
         "OutlineLargeConstants.cpp",
         "PassDetail.h",
@@ -64,7 +59,6 @@
     ],
     hdrs = [
         "DestructiveUpdateUtils.h",
-        "DispatchConfig.h",
         "Passes.h",
         "Passes.h.inc",
     ],
@@ -73,7 +67,6 @@
         "//iree/compiler/Conversion/HLOToHLO",
         "//iree/compiler/Conversion/HLOToLinalg:HLOToLinalgOnTensors",
         "//iree/compiler/Conversion/LinalgToLinalg",
-        "//iree/compiler/Dialect/Flow/Analysis",
         "//iree/compiler/Dialect/Flow/Conversion",
         "//iree/compiler/Dialect/Flow/Conversion/HLOToFlow",
         "//iree/compiler/Dialect/Flow/Conversion/StandardToFlow",

diff --git a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
index b5c489c..5fbe0ea 100644
--- a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt

@@ -24,26 +24,20 @@
     Transforms
   HDRS
     "DestructiveUpdateUtils.h"
-    "DispatchConfig.h"
     "Passes.h"
     "Passes.h.inc"
   SRCS
     "ConvertToFlowTensorOps.cpp"
     "DeduplicateExecutables.cpp"
     "DestructiveUpdateUtils.cpp"
-    "DispatchConfig.cpp"
     "DispatchLinalgOnTensors.cpp"
-    "DispatchabilityAnalysis.cpp"
     "ExpandVariableDynamicDims.cpp"
     "ExportBenchmarkFuncs.cpp"
-    "FoldCompatibleDispatchRegions.cpp"
     "FormStreams.cpp"
     "HLOToHLOPreprocessing.cpp"
     "HoistUnstreamableOps.cpp"
-    "IdentifyDispatchRegions2.cpp"
     "InjectDispatchTracing.cpp"
     "LegalizeInputTypes.cpp"
-    "OutlineDispatchRegions.cpp"
     "OutlineDispatchRegions2.cpp"
     "OutlineLargeConstants.cpp"
     "PassDetail.h"
@@ -72,7 +66,6 @@
     iree::compiler::Conversion::HLOToHLO
     iree::compiler::Conversion::HLOToLinalg::HLOToLinalgOnTensors
     iree::compiler::Conversion::LinalgToLinalg
-    iree::compiler::Dialect::Flow::Analysis
     iree::compiler::Dialect::Flow::Conversion
     iree::compiler::Dialect::Flow::Conversion::HLOToFlow
     iree::compiler::Dialect::Flow::Conversion::StandardToFlow

diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
deleted file mode 100644
index bd315a6..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
+++ /dev/null

@@ -1,242 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-
-#define DEBUG_TYPE "iree-detail"
-
-static llvm::cl::opt<bool> clEnableConsumerOnlyFusion(
-    "iree-enable-consumer-only-fusion",
-    llvm::cl::desc("Flag to enable fusion of matmul, etc. with its consumers, "
-                   "experimental flag to evaluate fusion"),
-    llvm::cl::init(false));
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-// TODO(laurenzo): Every one of these should have better support and removed
-// from this exclusion list eventually.
-// Allowlist of ops that materialize to a an index-permuted copy of some kind
-// if they exist standalone. Generally we try to avoid anchoring on these,
-// letting them fuse into more meaningful ops as possible.
-bool isIndexOp(Operation *op) {
-  // TODO(laurenzo): Curate this list more specifically (or have a better
-  // mechanism for determining).
-  return isa<Shape::RankedBroadcastInDimOp>(op) ||
-         isa<mhlo::BroadcastInDimOp>(op) || isa<mhlo::BroadcastOp>(op) ||
-         isa<mhlo::DynamicBroadcastInDimOp>(op) ||
-         isa<mhlo::DynamicReshapeOp>(op) || isa<mhlo::DynamicSliceOp>(op) ||
-         isa<mhlo::SliceOp>(op) || isa<mhlo::TransposeOp>(op);
-}
-
-/// Returns true if |lhs| and |rhs| return a single value with the same shape.
-bool checkSameOutputShape(Operation *lhs, Operation *rhs) {
-  if (lhs->getNumResults() != 1 || rhs->getNumResults() != 1) {
-    return false;
-  }
-  ShapedType lhsType = lhs->getResults()[0].getType().dyn_cast<ShapedType>();
-  ShapedType rhsType = rhs->getResults()[0].getType().dyn_cast<ShapedType>();
-  if (!lhsType || !rhsType || lhsType != rhsType) {
-    return false;
-  }
-  // The shapes match, but if one of the ops is a transpose the transpose shape
-  // might match, especially in dynamic case where the shapes might be different
-  // at runtime.
-  if (isa<mhlo::TransposeOp>(lhs) || isa<mhlo::TransposeOp>(rhs)) {
-    return false;
-  }
-  return true;
-}
-}  // namespace
-
-//------------------------------------------------------------------------------
-// OpDispatchPolicy
-//------------------------------------------------------------------------------
-
-bool OpDispatchPolicy::isDispatchable(Operation *op) {
-  if (FlowDialect::isDialectOp(op)) {
-    // Ignore things we've already produced as they should only relate to
-    // sequencer operations.
-    LLVM_DEBUG(llvm::dbgs() << "  NOT DISPATCHABLE (Flow Dialect): "
-                            << op->getName() << "\n");
-    return false;
-  } else if (op->hasTrait<OpTrait::IsTerminator>()) {
-    // Currently we skip all terminators as we want to leave them in the block
-    // to keep it valid. Future folding passes may take care of them if they are
-    // worth bringing into the dispatch region.
-    LLVM_DEBUG(llvm::dbgs() << "  NOT DISPATCHABLE (Known Terminator): "
-                            << op->getName() << "\n");
-    return false;
-  } else if (auto callOp = dyn_cast<CallOp>(op)) {
-    bool dispatchable = dispatchability.isDispatchable(callOp.getCallee());
-    LLVM_DEBUG(llvm::dbgs()
-               << "  " << (dispatchable ? "" : "NOT ")
-               << "DISPATCHABLE (Call): " << op->getName() << "\n");
-    return dispatchable;
-  } else if (isa<CallIndirectOp>(op)) {
-    // Indirect calls are not supported in dispatch code.
-    LLVM_DEBUG(llvm::dbgs() << "  NOT DISPATCHABLE (Call Indirect): "
-                            << op->getName() << "\n");
-    return false;
-  } else if (isa<ConstantOp>(op)) {
-    // Constants are handled in the RematerializeDispatchConstants pass.
-    // We do that independently so that we can more easily see the use of
-    // constants across all dispatches instead of just on an individual basis
-    // as we do here.
-    LLVM_DEBUG(llvm::dbgs()
-               << "  NOT DISPATCHABLE (Constant): " << op->getName() << "\n");
-    return false;
-  } else if (op->getNumResults() &&
-             !op->getResult(0).getType().isa<ShapedType>()) {
-    // We don't put scalar manipulation into dispatch regions.
-    LLVM_DEBUG(llvm::dbgs()
-               << "  NOT DISPATCHABLE (Non Shaped): " << op->getName() << "\n");
-    return false;
-  } else if (!isOpOfKnownDialect(op)) {
-    // Probably a custom op.
-    LLVM_DEBUG(llvm::dbgs() << "  NOT DISPATCHABLE (Unknown Dialect): "
-                            << op->getName() << "\n");
-    return false;
-  }
-  LLVM_DEBUG(llvm::dbgs() << "  DISPATCHABLE: " << op->getName() << "\n");
-  return true;
-}
-
-bool OpDispatchPolicy::isIdentityMetadata(Operation *op) {
-  return isa<linalg::InitTensorOp, Shape::TieShapeOp, Shape::MakeRankedShapeOp>(
-      op);
-}
-
-bool OpDispatchPolicy::isViewModificationOp(Operation *op) {
-  return isa<mhlo::ReshapeOp, linalg::TensorReshapeOp>(op);
-}
-
-int OpDispatchPolicy::getAnchorBenefit(Operation *op) {
-  if (isUnsupportedFusionOp(op) || isFusableWithConsumersOnly(op)) {
-    return 100;
-  }
-
-  if (isIdentityMetadata(op)) {
-    // Cannot anchor.
-    return 0;
-  } else if (isViewModificationOp(op)) {
-    return 1;
-  } else if (isIndexOp(op)) {
-    // We generally do not want to form anchors around ops that just do a copy
-    // (perhaps with an affine map) except as a last resort.
-    return 5;
-  } else if (isa<mhlo::SelectOp>(op)) {
-    // TODO(#2050): In a number of cases, this makes it less likely to split
-    // a DR across a compare/select boundary. Remove this once i1 is legalized
-    // properly.
-    return 15;
-  } else {
-    // Most dispatchable ops can anchor but are a fairly low benefit.
-    return 10;
-  }
-}
-
-OpDispatchPolicy::FusionType OpDispatchPolicy::fuseInput(Operation *anchorOp,
-                                                         Operation *inputOp) {
-  if (inputOp->hasTrait<OpTrait::IsTerminator>()) return FusionType::DISABLED;
-
-  if (isIdentityMetadata(inputOp) || isViewModificationOp(inputOp)) {
-    // Shape ties must always be duplicated into the region and remain in their
-    // original position. This should apply to any such "metadata" ops.
-    return FusionType::CLONE_INTO;
-  }
-  if (isUnsupportedFusionOp(anchorOp) || isUnsupportedFusionOp(inputOp)) {
-    return FusionType::DISABLED;
-  }
-  if (isFusableWithConsumersOnly(anchorOp)) {
-    return FusionType::DISABLED;
-  }
-
-  // By default for operands, they are duplicated into the dispatch region.
-  // Typically at the initial fusion stage, there is not a sufficient cost
-  // model to determine whether it is more beneficial to fuse or materialize,
-  // so the bias is towards fusion and leaving inter-region analysis to a later
-  // phase.
-  return FusionType::CLONE_INTO;
-}
-
-OpDispatchPolicy::FusionType OpDispatchPolicy::fuseOutput(Operation *anchorOp,
-                                                          Operation *outputOp) {
-  if (outputOp->hasTrait<OpTrait::IsTerminator>() ||
-      outputOp->getNumResults() == 0) {
-    return FusionType::DISABLED;
-  }
-  if (isIdentityMetadata(outputOp) || isViewModificationOp(outputOp)) {
-    return FusionType::MOVE_INTO;
-  }
-
-  if (isUnsupportedFusionOp(anchorOp) || isUnsupportedFusionOp(outputOp)) {
-    return FusionType::DISABLED;
-  }
-  if (isFusableWithConsumersOnly(anchorOp) &&
-      !isFusableWithConsumersOnly(outputOp)) {
-    if (isFusableWithConsumerOfSameOutputShapeOnly(anchorOp) &&
-        !checkSameOutputShape(anchorOp, outputOp)) {
-      return FusionType::DISABLED;
-    }
-    return FusionType::MOVE_INTO;
-  }
-
-  // Generally, it is hard to reason locally about the legality of fusing an
-  // output, since additional analysis may need to be done to determine
-  // workload compatibility (especially with dynamic shapes involved). As
-  // such, we do as little as possible here and instead rely on optimization
-  // passes to merge compatible regions.
-  return FusionType::DISABLED;
-}
-
-bool OpDispatchPolicy::isFusableWithConsumerOfSameOutputShapeOnly(
-    Operation *op) {
-  return clEnableConsumerOnlyFusion && isa<mhlo::DotOp, mhlo::DotGeneralOp>(op);
-}
-
-bool OpDispatchPolicy::isFusableWithConsumersOnly(Operation *op) {
-  return isFusableWithConsumerOfSameOutputShapeOnly(op);
-}
-
-// TODO(b/144530470): replace with tablegen attributes/interfaces.
-bool OpDispatchPolicy::isUnsupportedFusionOp(Operation *op) {
-  return isa<linalg::IndexedGenericOp, linalg::GenericOp, mhlo::ConcatenateOp,
-             mhlo::ConvOp, mhlo::PadOp, mhlo::ReduceOp, mhlo::ReduceWindowOp,
-             mhlo::SliceOp>(op) ||
-         (!clEnableConsumerOnlyFusion &&
-          isa<mhlo::DotOp, mhlo::DotGeneralOp>(op)) ||
-         isLeafOnlyOp(op);
-}
-
-bool OpDispatchPolicy::isLeafOnlyOp(Operation *op) {
-  return isa<mhlo::TorchIndexSelectOp>(op);
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h
deleted file mode 100644
index 6d82d22..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h
+++ /dev/null

@@ -1,99 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "mlir/IR/Operation.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-// Queries dispatch options for an operation.
-// This is presently mostly a hard-coded set of heuristics but should expand
-// to be based on both queries of new op interfaces and a cost model.
-class OpDispatchPolicy {
- public:
-  // The benefit that selecting an anchor is expected to provide. Anchors
-  // with higher benefit should be formed first.
-  using AnchorBenefit = int;
-  enum class FusionType {
-    // Fusion is disallowed.
-    DISABLED = 0,
-    // The operation should be duped into the dispatch region.
-    CLONE_INTO = 1,
-    // The operation should be cloned into the dispatch region and have
-    // uses be redirected to the dispatch region.
-    MOVE_INTO = 3,
-  };
-
-  OpDispatchPolicy(Dispatchability &dispatchability)
-      : dispatchability(dispatchability) {}
-
-  // Returns true if |op| is only fusable with its consumers.
-  static bool isFusableWithConsumersOnly(Operation *op);
-
-  // Returns true if |op| is only fusable with its consumers where consumer has
-  // the same shape for the output.
-  static bool isFusableWithConsumerOfSameOutputShapeOnly(Operation *op);
-
-  // Returns true if |op| can be treated as a view modification, i.e. eventually
-  // the op is lowered to just change the way the underlying buffer holding the
-  // values is viewed w.r.t shape, etc.
-  static bool isViewModificationOp(Operation *op);
-
-  // Returns true if |op| is not able to fuse with either producer or consumer.
-  static bool isUnsupportedFusionOp(Operation *op);
-
-  // Returns true if |op| can only be a leaf op.
-  static bool isLeafOnlyOp(Operation *op);
-
-  // Returns true if the given |op| can be dispatched in all cases.
-  // Other passes may handle special cases of these ops but this initial
-  // identification is conservative.
-  bool isDispatchable(Operation *op);
-
-  // Returns true if the op is an "identity metadata" op that must be preserved
-  // at use-def boundaries. Such ops are non-executalbe, with >= 1 operands
-  // and one result where the result is assumed to be operand(0) with any
-  // op-specific metadata attached.
-  bool isIdentityMetadata(Operation *op);
-
-  // Returns the benefit of treating the given op as an anchor to form a
-  // dispatch region around, where <= 0 disables the ability of the op to
-  // be an anchor.
-  // Anchors are identified greedily by sorting in descending order of
-  // anchor benefit and ascending topological order (i.e. all ops with the
-  // highest benefit have a dispatch region greedily formed around them
-  // prior to proceeding to ops with the next lowest benefit).
-  //
-  // It is only valid to call this for dispatchable ops.
-  AnchorBenefit getAnchorBenefit(Operation *op);
-
-  // Returns the type of fusion that can be done for an input op that feeds
-  // into a given anchor op.
-  FusionType fuseInput(Operation *anchorOp, Operation *inputOp);
-
-  // Returns the type of fusion that can be done for an output op that
-  // follows an anchor op.
-  FusionType fuseOutput(Operation *anchorOp, Operation *outputOp);
-
- private:
-  Dispatchability &dispatchability;
-};
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp
deleted file mode 100644
index cfc09b9..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchabilityAnalysis.cpp
+++ /dev/null

@@ -1,57 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-class DispatchabilityAnalysisPass
-    : public DispatchabilityAnalysisBase<DispatchabilityAnalysisPass> {
- public:
-  DispatchabilityAnalysisPass() = default;
-
-  void runOnOperation() override {
-    // Force creation (or caching) of dispatchability information.
-    auto &dispatchability = getAnalysis<Dispatchability>();
-    markAllAnalysesPreserved();
-
-    // Build the dispatchable func table.
-    if (dispatchableFuncOps_) {
-      dispatchability.walkDispatchableOps([&](FuncOp funcOp) {
-        (*dispatchableFuncOps_)[funcOp.getName()] = funcOp;
-      });
-    }
-  }
-
-  std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps_;
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityAnalysisPass() {
-  return std::make_unique<DispatchabilityAnalysisPass>();
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp b/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
index a0c9cdd..5022da5 100644
--- a/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp

@@ -27,12 +27,10 @@
 namespace IREE {
 namespace Flow {
 
-// Exports two kind of benchmark functions:
-//   - Creates exported functions to invoke each executable op.
-//   - Clones each exported functions (including those just created) with
-//     placeholder constant inputs instead of arguments and removes the
-//     exported attribute from the old functions.
-// The input are provided using flow.variable and flow.lookup.
+// Clones each exported functions (including those just created) with
+// placeholder constant inputs instead of arguments and removes the exported
+// attribute from the old functions.
+// The input are provided using flow.variables.
 class ExportBenchmarkFuncsPass
     : public ExportBenchmarkFuncsBase<ExportBenchmarkFuncsPass> {
  public:
@@ -51,11 +49,6 @@
     for (auto entryFuncOp : entryFuncOps) {
       createEntryPointBenchmarkFunc(moduleOp, entryFuncOp);
     }
-
-    // Create one benchmark function per entry point in each flow.executable.
-    for (auto executableOp : moduleOp.getOps<IREE::Flow::ExecutableOp>()) {
-      createExecutableBenchmarkFunc(moduleOp, executableOp);
-    }
   }
 
  private:
@@ -122,63 +115,6 @@
     entryFuncOp.setPrivate();
   }
 
-  void createExecutableBenchmarkFunc(ModuleOp moduleOp,
-                                     IREE::Flow::ExecutableOp executableOp) {
-    OpBuilder moduleBuilder(&getContext());
-    moduleBuilder.setInsertionPointAfter(executableOp);
-    for (auto& op : executableOp.getBlock()) {
-      auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op);
-      if (!dispatchEntryOp) continue;
-      auto execFuncOp = executableOp.getInnerModule().lookupSymbol<FuncOp>(
-          dispatchEntryOp.function_ref());
-      Location loc = execFuncOp.getLoc();
-
-      // Create one dummy input variable per input.
-      SmallVector<IREE::Flow::VariableOp, 4> dummyInputVariableOps;
-      for (auto inputType : execFuncOp.getType().getInputs()) {
-        dummyInputVariableOps.push_back(
-            createDummyInputVariableOp(loc, inputType, moduleBuilder));
-      }
-
-      // Create a `() -> ()` entry point op the benchmark tool can run.
-      std::string funcName = std::string(execFuncOp.getName()) + "_benchmark";
-      auto funcType = moduleBuilder.getFunctionType({}, {});
-      auto funcOp = moduleBuilder.create<FuncOp>(loc, funcName, funcType);
-      funcOp->setAttr("iree.module.export", moduleBuilder.getUnitAttr());
-      funcOp->setAttr("iree.abi.stub", moduleBuilder.getUnitAttr());
-      SmallVector<NamedAttribute> reflectionAttrs = {
-          moduleBuilder.getNamedAttr("benchmark",
-                                     moduleBuilder.getStringAttr("dispatch")),
-      };
-      funcOp->setAttr("iree.reflection",
-                      moduleBuilder.getDictionaryAttr(reflectionAttrs));
-      Block* block = funcOp.addEntryBlock();
-
-      // Build the body of the FuncOp.
-      auto blockBuilder = OpBuilder(block, block->begin());
-      SmallVector<Value, 4> args;
-      for (auto variableOp : dummyInputVariableOps) {
-        args.push_back(blockBuilder.createOrFold<IREE::Flow::VariableLoadOp>(
-            loc, variableOp));
-      }
-
-      // TODO(hanchung): Use a real workload instead? We can probably
-      // calculate the workload from the results.
-      auto dummyWorkload = blockBuilder.create<ConstantIndexOp>(loc, 0);
-      auto dispatchOp = blockBuilder.create<DispatchOp>(
-          loc, dispatchEntryOp, ValueRange{dummyWorkload},
-          execFuncOp.getType().getResults(), ValueRange{}, args, ValueRange{},
-          ArrayRef<int64_t>{});
-
-      // Sink all results with do_not_optimize to ensure that DCE does not
-      // remove the dispatch.
-      for (auto result : dispatchOp.getResults()) {
-        blockBuilder.create<IREE::DoNotOptimizeOp>(loc, result);
-      }
-      blockBuilder.create<mlir::ReturnOp>(loc);
-    }
-  }
-
   int uniqueId = 0;
 };
 

diff --git a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
deleted file mode 100644
index 08ed480..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
+++ /dev/null

@@ -1,416 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Transforms/Utils.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-// Replaces |returnOp| with a clone including |newOperands| appended.
-LogicalResult appendReturnOperands(IREE::Flow::ReturnOp returnOp,
-                                   ArrayRef<Value> newOperands) {
-  // Insert prior to the original return.
-  OpBuilder builder(returnOp);
-
-  // Clone with new args.
-  SmallVector<Value, 8> operands;
-  operands.reserve(returnOp.getNumOperands() + newOperands.size());
-  operands.append(returnOp.operand_begin(), returnOp.operand_end());
-  operands.append(newOperands.begin(), newOperands.end());
-  builder.create<IREE::Flow::ReturnOp>(returnOp.getLoc(), operands);
-
-  // Remove original.
-  returnOp.erase();
-
-  return success();
-}
-
-// Replaces |regionOp| with a clone including |newArgs| and |newResults|.
-DispatchRegionOp appendRegionArgsAndResults(DispatchRegionOp &regionOp,
-                                            ArrayRef<Value> newArgs,
-                                            ArrayRef<Value> newResults,
-                                            Location otherLoc) {
-  // Insert prior to the original region.
-  OpBuilder builder(regionOp);
-
-  // Location is original region + new region location (both probably fused).
-  SmallVector<Location, 2> fusedLocs = {regionOp.getLoc(), otherLoc};
-  auto fusedLoc = FusedLoc::get(regionOp.getContext(), fusedLocs);
-
-  // Clone with new results.
-  SmallVector<Value, 8> operands;
-  operands.append(regionOp.args().begin(), regionOp.args().end());
-  operands.append(newArgs.begin(), newArgs.end());
-  SmallVector<Type, 8> resultTypes;
-  resultTypes.append(regionOp.result_type_begin(), regionOp.result_type_end());
-  for (auto newResult : newResults) {
-    resultTypes.push_back(newResult.getType());
-  }
-  auto newRegionOp = builder.create<DispatchRegionOp>(
-      fusedLoc, resultTypes, regionOp.workload(), operands,
-      regionOp->getAttrs());
-  newRegionOp.body().takeBody(regionOp.body());
-
-  // Replace uses of original values with the new values.
-  for (int i = 0; i < regionOp.getNumResults(); ++i) {
-    regionOp.getResult(i).replaceAllUsesWith(newRegionOp.getResult(i));
-  }
-
-  // Erase the original region.
-  regionOp.erase();
-
-  return newRegionOp;
-}
-
-// Removes results that are not used from the dispatch region.
-// Returns the new operation. There may be unused ops in the region but DCE
-// should take care of that later.
-DispatchRegionOp removeUnusedResults(DispatchRegionOp regionOp) {
-  // Find return value within the region.
-  auto &regionBlock = regionOp.body().getBlocks().front();
-  auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(regionBlock.getTerminator());
-  if (!returnOp) {
-    regionBlock.getParent()->getParentOfType<FuncOp>().emitError()
-        << "block does not contain an flow.return op";
-  }
-
-  // Calculate new return values.
-  SmallVector<Type, 8> newReturnTypes;
-  SmallVector<Value, 8> newReturnValues;
-  SmallVector<Value, 8> newRegionResults;
-  for (int i = 0; i < returnOp.getNumOperands(); ++i) {
-    auto resultValue = regionOp.getResult(i);
-    if (!resultValue.use_empty()) {
-      // Still has uses so we will preserve it.
-      newReturnTypes.push_back(resultValue.getType());
-      newReturnValues.push_back(returnOp.getOperand(i));
-      newRegionResults.push_back(resultValue);
-    }
-  }
-
-  // Update return op operands. We can do this in-place as we are only shrinking
-  // the list.
-  returnOp.getOperation()->setOperands(newReturnValues);
-
-  // Insert prior to the original region.
-  OpBuilder builder(regionOp);
-
-  // Clone with new results.
-  auto newRegionOp = builder.create<DispatchRegionOp>(
-      regionOp.getLoc(), newReturnTypes, regionOp.workload(), regionOp.args(),
-      regionOp->getAttrs());
-  newRegionOp.body().takeBody(regionOp.body());
-
-  // Replace uses of original values with the new values.
-  for (int i = 0; i < newRegionResults.size(); ++i) {
-    newRegionResults[i].replaceAllUsesWith(newRegionOp.getResult(i));
-  }
-
-  // Erase the original region.
-  regionOp.erase();
-
-  return newRegionOp;
-}
-
-// Returns true if |lhs| and |rhs| have either an identical workload or one that
-// is compatible.
-bool areDispatchRegionWorkloadsCompatible(DispatchRegionOp &lhs,
-                                          DispatchRegionOp &rhs) {
-  // TODO(benvanik): more sophisticated checking; right now it's just identical.
-  return lhs.workload() == rhs.workload();
-}
-
-// Returns true if |value| depends in any way on |op| through any path.
-bool doesValueDependOnOperation(Value value, Operation *op) {
-  if (!value.getDefiningOp()) {
-    return false;
-  } else if (value.getDefiningOp() == op) {
-    return true;
-  } else if (value.getDefiningOp()->getBlock() == op->getBlock() &&
-             value.getDefiningOp()->isBeforeInBlock(op)) {
-    // Can't depend on |op| as it is defined prior to it.
-    return false;
-  } else if (value.getDefiningOp()->getBlock() == op->getBlock() &&
-             !value.getDefiningOp()->isBeforeInBlock(op)) {
-    // |op| is defined before one of |value| operands.
-    return true;
-  }
-  for (auto operand : value.getDefiningOp()->getOperands()) {
-    if (doesValueDependOnOperation(operand, op)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Returns true if |rhs| transitively depends on any out of |lhs|.
-// |rhs| may depend directly on the results of |lhs| but no other ops in the
-// parent block will use the results prior to |rhs|.
-bool areDispatchRegionsTransitivelyDependent(DispatchRegionOp &lhs,
-                                             DispatchRegionOp &rhs) {
-  for (auto arg : rhs.args()) {
-    if (arg.getDefiningOp() != lhs && doesValueDependOnOperation(arg, lhs)) {
-      // Transitively dependent - boo - can't merge yet.
-      return true;
-    }
-  }
-  return false;
-}
-
-// Returns true if the dispatch region contains only a single block.
-// This is because our merge isn't very smart and will not preserve the CFG
-// right now. We can fix this when needed.
-bool isDispatchRegionMergable(DispatchRegionOp &regionOp) {
-  // Disallow merging of dispatch regions containing matmuls and other big ops.
-  // We do this to allow backends to lower the big op as entirely isolated such
-  // that substituting library calls is easier.
-  for (auto &block : regionOp.body().getBlocks()) {
-    for (auto &op : block) {
-      // A leaf only op is mergable.
-      if ((OpDispatchPolicy::isUnsupportedFusionOp(&op) ||
-           OpDispatchPolicy::isFusableWithConsumersOnly(&op)) &&
-          !OpDispatchPolicy::isLeafOnlyOp(&op)) {
-        return false;
-      }
-    }
-  }
-  return regionOp.body().getBlocks().size() == 1;
-}
-
-// Returns true if rhs has ops that can only be leaf op and will lose the
-// characteristic if merge two dispatch regions.
-bool rhsHasLeafOnlyOp(DispatchRegionOp &lhs, DispatchRegionOp &rhs) {
-  auto &rhsBlock = rhs.body().front();
-  auto lhsArgs = llvm::to_vector<8>(lhs.args());
-  auto rhsArgs = llvm::to_vector<8>(rhs.args());
-  for (int rhsOpIdx = 0; rhsOpIdx < rhsArgs.size(); ++rhsOpIdx) {
-    for (int lhsResultIdx = 0; lhsResultIdx < lhs.getNumResults();
-         ++lhsResultIdx) {
-      if (rhsArgs[rhsOpIdx] != lhs.getResult(lhsResultIdx)) continue;
-      for (auto *user : rhsBlock.getArgument(rhsOpIdx).getUsers()) {
-        if (OpDispatchPolicy::isLeafOnlyOp(user)) return true;
-      }
-    }
-  }
-  return false;
-}
-
-// Merges |rhs| into |lhs| and returns the new |lhs| op.
-// Precondition: !areDispatchRegionsTransitivelyDependent
-DispatchRegionOp mergeDispatchRegions(DispatchRegionOp &lhs,
-                                      DispatchRegionOp &rhs) {
-  auto &lhsBlock = lhs.body().front();
-  auto &rhsBlock = rhs.body().front();
-
-  // Find the values used as return values in the lhs.
-  // We'll need to replace the uses in rhs with these.
-  auto lhsReturnOp = cast<IREE::Flow::ReturnOp>(lhsBlock.getTerminator());
-  SmallVector<Value, 8> lhsReturnValues;
-  lhsReturnValues.reserve(lhsReturnOp.getNumOperands());
-  lhsReturnValues.append(lhsReturnOp.operand_begin(),
-                         lhsReturnOp.operand_end());
-
-  // Find the values used as return values in the rhs.
-  // We'll add these to the results of the lhs region.
-  auto rhsReturnOp = cast<IREE::Flow::ReturnOp>(rhsBlock.getTerminator());
-  SmallVector<Value, 8> rhsReturnValues;
-  rhsReturnValues.reserve(rhsReturnOp.getNumOperands());
-  rhsReturnValues.append(rhsReturnOp.operand_begin(),
-                         rhsReturnOp.operand_end());
-
-  // Compute new args.
-  BlockAndValueMapping mapping;
-  SmallVector<Value, 8> newArgs;
-  auto lhsArgs = llvm::to_vector<8>(lhs.args());
-  auto rhsArgs = llvm::to_vector<8>(rhs.args());
-  for (int rhsOpIdx = 0; rhsOpIdx < rhsArgs.size(); ++rhsOpIdx) {
-    bool didElide = false;
-    // Find if the rhs arg already exists on the lhs and dedupe.
-    for (int lhsOpIdx = 0; lhsOpIdx < lhsArgs.size(); ++lhsOpIdx) {
-      if (rhsArgs[rhsOpIdx] == lhsArgs[lhsOpIdx]) {
-        mapping.map(rhsBlock.getArgument(rhsOpIdx),
-                    lhsBlock.getArgument(lhsOpIdx));
-        didElide = true;
-        break;
-      }
-    }
-    // Find if the arg has a direct dependency on the results of the lhs.
-    for (int lhsResultIdx = 0; lhsResultIdx < lhs.getNumResults();
-         ++lhsResultIdx) {
-      if (rhsArgs[rhsOpIdx] == lhs.getResult(lhsResultIdx)) {
-        // Direct dependency; can elide. We'll skip adding it to the new region
-        // args and instead just remap it later.
-        mapping.map(rhsBlock.getArgument(rhsOpIdx),
-                    lhsReturnValues[lhsResultIdx]);
-        didElide = true;
-        break;
-      }
-    }
-    if (!didElide) {
-      // Add to the lhs block.
-      auto oldArg = rhs.getOperand(rhsOpIdx + 1);
-      auto newArg = lhsBlock.addArgument(oldArg.getType());
-      mapping.map(rhsBlock.getArgument(rhsOpIdx), newArg);
-      newArgs.push_back(oldArg);
-    }
-  }
-
-  OpBuilder regionBuilder = OpBuilder::atBlockEnd(&lhsBlock);
-
-  // Copy ops (replacing any args as needed).
-  // Note that we need to insert prior to the terminator.
-  regionBuilder.setInsertionPoint(lhsReturnOp);
-  for (auto &op : rhsBlock) {
-    // Note that this updates the mapping with the new values (so at the end
-    // we have those new values).
-    //
-    // We avoid the return op here as we have already merged it above.
-    if (!op.hasTrait<OpTrait::IsTerminator>()) {
-      regionBuilder.clone(op, mapping);
-    }
-  }
-
-  // Compute new results and add to both region and return op.
-  SmallVector<Value, 8> newResults;
-  for (auto rhsResult : rhsReturnValues) {
-    newResults.push_back(mapping.lookupOrDefault(rhsResult));
-  }
-  if (failed(appendReturnOperands(lhsReturnOp, newResults))) {
-    return nullptr;
-  }
-  auto newRegionOp =
-      appendRegionArgsAndResults(lhs, newArgs, newResults, rhs.getLoc());
-
-  // Replace uses of original values with the new values.
-  for (int i = 0; i < rhs.getNumResults(); ++i) {
-    rhs.getResult(i).replaceAllUsesWith(
-        newRegionOp.getResult(lhsReturnValues.size() + i));
-  }
-
-  // Remove rhs region.
-  rhs.erase();
-
-  // Remove results from the lhs that aren't used anymore as they may have been
-  // elided when we merged as only the rhs was using them.
-  newRegionOp = removeUnusedResults(newRegionOp);
-
-  return newRegionOp;
-}
-
-// Merges multiple dispatch regions within a block into the same region,
-// if possible. Operations may be reordered if it's possible to merge more while
-// still obeying data dependencies.
-LogicalResult mergeBlockDispatchRegions(FuncOp func, Block *parentBlock) {
-  LLVM_DEBUG(llvm::dbgs() << "+++ MERGING BLOCK DISPATCH REGIONS:\n");
-  SmallVector<DispatchRegionOp, 8> mergableRegions;
-  for (auto &op : *parentBlock) {
-    if (auto regionOp = dyn_cast<DispatchRegionOp>(op)) {
-      if (isDispatchRegionMergable(regionOp)) {
-        LLVM_DEBUG(llvm::dbgs() << "   -REGION MERGABLE-\n");
-        mergableRegions.push_back(regionOp);
-      } else {
-        LLVM_DEBUG(llvm::dbgs() << "   -REGION NOT MERGABLE-\n");
-      }
-    }
-  }
-  for (int i = 0; i < mergableRegions.size(); ++i) {
-    if (!mergableRegions[i]) continue;
-    auto &lhs = mergableRegions[i];
-    for (int j = i + 1; j < mergableRegions.size(); ++j) {
-      if (!mergableRegions[j]) continue;
-      auto &rhs = mergableRegions[j];
-      if (!areDispatchRegionWorkloadsCompatible(lhs, rhs) ||
-          areDispatchRegionsTransitivelyDependent(lhs, rhs)) {
-        LLVM_DEBUG(llvm::dbgs() << "   -REGIONS INCOMPATIBLE-\n");
-        continue;
-      }
-      if (!isDispatchRegionMergable(rhs)) {
-        // TODO(b/134675461): support non-trivial control flow.
-        LLVM_DEBUG(llvm::dbgs()
-                   << "   -REGION CONTAINS NON-TRIVIAL CONTROL FLOW-\n");
-      }
-      if (rhsHasLeafOnlyOp(lhs, rhs)) {
-        LLVM_DEBUG(llvm::dbgs() << "   -RHS REGION HAS LEAF OP-\n");
-        continue;
-      }
-      mergableRegions[i] = mergeDispatchRegions(lhs, rhs);
-      if (!mergableRegions[i]) {
-        return failure();
-      }
-      mergableRegions[j] = nullptr;
-      --i;  // Try again to see if there are subsequent regions to merge.
-      LLVM_DEBUG(llvm::dbgs() << "   -> MERGED REGIONS\n");
-      break;
-    }
-  }
-
-  LLVM_DEBUG(llvm::dbgs() << "--- MERGED BLOCK DISPATCH REGIONS\n");
-  return success();
-}
-
-}  // namespace
-
-// Identifies dispatch regions that have compatible workloads and folds them.
-// This relies on CSE having deduped workloads to simplify the logic to simply
-// looking for dispatch regions using the same values.
-class FoldCompatibleDispatchRegionsPass
-    : public FoldCompatibleDispatchRegionsBase<
-          FoldCompatibleDispatchRegionsPass> {
- public:
-  void runOnOperation() override {
-    FuncOp func = getOperation();
-    for (auto &block : func) {
-      if (failed(mergeBlockDispatchRegions(func, &block))) {
-        return signalPassFailure();
-      }
-    }
-  }
-};
-
-std::unique_ptr<OperationPass<FuncOp>>
-createFoldCompatibleDispatchRegionsPass() {
-  return std::make_unique<FoldCompatibleDispatchRegionsPass>();
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp b/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp
deleted file mode 100644
index 033e3b9..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions2.cpp
+++ /dev/null

@@ -1,436 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOpUtils.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/DispatchConfig.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/WorkloadUtils.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Support/Debug.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/RegionUtils.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-struct DispatchableOp {
-  OpDispatchPolicy::AnchorBenefit anchorBenefit;
-  size_t index;
-  Operation *op;
-
-  bool operator<(const DispatchableOp &other) const {
-    // Note inverted index: this is so that traversing a sorted list in
-    // reverse yields a topological ordering for each anchorBenefit.
-    return std::tie(anchorBenefit, other.index) <
-           std::tie(other.anchorBenefit, index);
-  }
-};
-
-struct DispatchRegion {
-  DispatchRegionOp op;
-  Operation *anchorOp;
-
-  Block &getEntryBlock() { return op.body().front(); }
-
-  static llvm::Optional<DispatchRegion> form(Operation *anchorOp) {
-    auto loc = anchorOp->getLoc();
-    if (anchorOp->getNumResults() < 1) {
-      emitError(loc) << "dispatch anchor op must have at least one result: "
-                     << *anchorOp;
-      return llvm::None;
-    }
-    Value result = anchorOp->getResult(0);
-    Value workload = calculateWorkload(anchorOp, result);
-    if (!workload) return llvm::None;
-
-    OpBuilder builder(anchorOp->getContext());
-    auto created =
-        DispatchRegionOp::formFromAnchorOp(workload, anchorOp, builder);
-    if (!created) return llvm::None;
-    return DispatchRegion{created->first, created->second};
-  }
-
-  // After a call to inlineDispatchOp, adds the results of the inlined op to
-  // the dispatch region's results and redirects any uses outside of the
-  // dispatch region.
-  void returnAndReplaceUses(Operation *origOp, Operation *inlinedOp) {
-    // Extend the arity of the dispatch region.
-    OpBuilder builder(op.getContext());
-    llvm::SmallVector<Value, 4> addlResults(inlinedOp->getResults());
-    origOp->replaceAllUsesWith(
-        DispatchRegionOp::appendResults(op, addlResults, builder));
-  }
-};
-
-// Clones and hoists any identity metadata ops from the operands and results
-// of the dispatch region back out into the surrounding block.
-// This function is not general purpose: it only knows how to undo sinking
-// done by dispatch region formation.
-void hoistDispatchRegionMetadataOps(DispatchRegion &dr,
-                                    OpDispatchPolicy &policy) {
-  BlockAndValueMapping mapping;
-  Block &block = dr.getEntryBlock();
-  for (unsigned i = 0, e = block.getNumArguments(); i < e; ++i) {
-    mapping.map(block.getArgument(i), dr.op.args()[i]);
-  }
-
-  // Hoist metadata ops from the operand edge.
-  for (auto it : llvm::enumerate(block.getArguments())) {
-    auto &blockArg = it.value();
-    for (auto &blockUse : blockArg.getUses()) {
-      Operation *useOp = blockUse.getOwner();
-      if (!policy.isIdentityMetadata(useOp) || useOp->getOperand(0) != blockArg)
-        continue;
-      OpBuilder builder(dr.op);
-      Operation *newOp = builder.clone(*useOp, mapping);
-      dr.op.argsMutable().slice(it.index(), 1).assign(newOp->getResult(0));
-    }
-  }
-
-  // Hoist metadata ops from the result edge.
-  // Since initial formation can only have a single block, this is safe.
-  auto *terminator = block.getTerminator();
-  for (auto it : llvm::enumerate(terminator->getOperands())) {
-    Operation *defOp = it.value().getDefiningOp();
-    if (!defOp || !policy.isIdentityMetadata(defOp)) continue;
-    OpBuilder builder(dr.op.getContext());
-    builder.setInsertionPointAfter(dr.op);
-    Operation *newOp = builder.clone(*defOp, mapping);
-    dr.op.getResult(it.index()).replaceAllUsesWith(newOp->getResult(0));
-    newOp->setOperand(0, dr.op.getResult(it.index()));
-  }
-}
-
-void findDispatchableAnchorOps(Block &block, OpDispatchPolicy &policy,
-                               OpDispatchPolicy::AnchorBenefit maxAnchorBenefit,
-                               llvm::SmallVectorImpl<DispatchableOp> &ops) {
-  for (auto it : llvm::enumerate(block.getOperations())) {
-    Operation *op = &it.value();
-    // Skip any already formed dispatch regions and non dispatchable ops.
-    if (isa<DispatchRegionOp>(op)) continue;
-    if (!policy.isDispatchable(op)) continue;
-    OpDispatchPolicy::AnchorBenefit anchorBenefit = policy.getAnchorBenefit(op);
-    if (anchorBenefit > maxAnchorBenefit || anchorBenefit <= 0) continue;
-    ops.push_back({anchorBenefit, it.index(), op});
-  }
-}
-
-// Maintains a worklist of operations that are potential fusion candidates.
-// By default, items are popped in inverse topological order. An operation
-// can only be added to a worklist once and later additions will be ignored.
-class FusionWorklist {
- public:
-  FusionWorklist(Block *block, bool inverseTopological = true)
-      : block(block), inverseTopological(inverseTopological) {}
-
-  // Adds defining ops of operands to the worklist.
-  void addOperandDefs(OperandRange operands) {
-    for (Value operand : operands) {
-      Operation *def = operand.getDefiningOp();
-      if (!def) continue;
-      if (def->getBlock() != block) continue;
-      if (!isValidItem(def)) continue;
-      if (!visited.insert(def).second) continue;
-      worklist.push_back(def);
-      dirty = true;
-    }
-  }
-
-  // Adds uses.
-  void addResultUses(ResultRange results) {
-    for (auto result : results) {
-      for (auto &use : result.getUses()) {
-        Operation *def = use.getOwner();
-        if (def->hasTrait<OpTrait::IsTerminator>()) continue;
-        if (def->getBlock() != block) continue;
-        if (!isValidItem(def)) continue;
-        if (!visited.insert(def).second) continue;
-        worklist.push_back(def);
-        dirty = true;
-      }
-    }
-  }
-
-  // Pops the next operation or nullptr if empty.
-  Operation *popNext() {
-    if (worklist.empty()) return nullptr;
-    if (dirty) sort();
-    return worklist.pop_back_val();
-  }
-
- private:
-  bool isValidItem(Operation *op) {
-    // Dispatch regions cannot be added to the worklist because they are
-    // modified/deleted in place and can not be guaranteed valid for the
-    // duration of the worklist.
-    return !llvm::isa<DispatchRegionOp>(op);
-  }
-
-  // Sorts worklist items such that popNext() values pop in inverse
-  // topological order.
-  void sort() {
-    if (inverseTopological) {
-      llvm::sort(worklist, [](Operation *left, Operation *right) {
-        return left->isBeforeInBlock(right);
-      });
-    } else {
-      llvm::sort(worklist, [](Operation *left, Operation *right) {
-        return right->isBeforeInBlock(left);
-      });
-    }
-  }
-
-  Block *block;
-  llvm::SmallVector<Operation *, 4> worklist;
-  llvm::SmallDenseSet<Operation *, 4> visited;
-  bool inverseTopological;
-  bool dirty = false;
-};
-
-LogicalResult fuseInputs(DispatchRegion &dispatchRegion,
-                         OpDispatchPolicy &policy) {
-  LLVM_DEBUG(llvm::dbgs() << "++ FUSING INPUTS\n");
-
-  FusionWorklist worklist(dispatchRegion.op.getOperation()->getBlock());
-  worklist.addOperandDefs(dispatchRegion.op.getOperands());
-
-  while (Operation *nextOp = worklist.popNext()) {
-    if (!policy.isDispatchable(nextOp)) continue;
-    auto action = policy.fuseInput(dispatchRegion.anchorOp, nextOp);
-    LLVM_DEBUG(llvm::dbgs().indent(2));
-    if (action == OpDispatchPolicy::FusionType::MOVE_INTO) {
-      return nextOp->emitError() << "cannot fuse input with MOVE_INTO action";
-    } else if (action == OpDispatchPolicy::FusionType::DISABLED) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "- SKIP NON FUSABLE INPUT: " << *nextOp << "\n");
-      continue;
-    }
-
-    // Always inline inputs at the top of the block. Since we are processing
-    // the worklist in inverse topological order, this preserves the original
-    // ordering.
-    LLVM_DEBUG(llvm::dbgs() << "- FUSABLE INPUT(" << static_cast<int>(action)
-                            << "): " << *nextOp << "\n");
-    OpBuilder builder(nextOp->getContext());
-    auto *inlinedOp = dispatchRegion.op.inlineOp(nextOp, builder);
-    if (!inlinedOp) {
-      return failure();
-    }
-    worklist.addOperandDefs(nextOp->getOperands());
-
-    // Erase the op if it has no uses. This keeps it from forming regions
-    // that will be dce'd later (or getting in the way of the benefit
-    // scheme). Note that dispatchable ops have no side effects, which
-    // makes this simple check safe.
-    // The dispatch region must be optimized to remove unused arguments
-    // resulting from this fusion.
-    if (nextOp->use_empty()) {
-      nextOp->erase();
-    }
-  }
-
-  return success();
-}
-
-// Inlining an op into a dispatch region makes the operands of the op the
-// operands of the dispatch region (if the operands arent already defined in the
-// dispatch region). The dispatch region has to be moved just after the last
-// defined operand for SSA value use to be valid.
-static LogicalResult moveDispatchOp(DispatchRegionOp dispatchRegionOp,
-                                    Operation *inlinedOp) {
-  // Check the operation that is the lexicographically first to produce an
-  // operand to the inlinedOp
-  Optional<Operation *> lastOperandDef = llvm::None;
-  for (Value operand : inlinedOp->getOperands()) {
-    if (Operation *definingOp = operand.getDefiningOp()) {
-      if (!lastOperandDef ||
-          lastOperandDef.getValue()->isBeforeInBlock(definingOp)) {
-        lastOperandDef = definingOp;
-      }
-    }
-  }
-  // Check for values that are used in the region of the op but captured from
-  // outside the region.
-  llvm::SetVector<Value> capturedValues;
-  getUsedValuesDefinedAbove(inlinedOp->getRegions(), capturedValues);
-  for (Value capturedValue : capturedValues) {
-    if (Operation *definingOp = capturedValue.getDefiningOp()) {
-      if (!lastOperandDef ||
-          lastOperandDef.getValue()->isBeforeInBlock(definingOp)) {
-        lastOperandDef = definingOp;
-      }
-    }
-  }
-  // If the last operand def is already before the dispatch region, there is
-  // nothing to do.
-  if (!lastOperandDef ||
-      lastOperandDef.getValue()->isBeforeInBlock(dispatchRegionOp)) {
-    return success();
-  }
-
-  // The dispatch region needs to be moved after the lastOperandDef, but before
-  // the first use.
-  Optional<Operation *> firstUse = llvm::None;
-  for (Operation *user : dispatchRegionOp.getOperation()->getUsers()) {
-    if (!firstUse || user->isBeforeInBlock(*firstUse)) {
-      firstUse = user;
-    }
-  }
-  if (firstUse && firstUse.getValue()->isBeforeInBlock(*lastOperandDef))
-    return failure();
-  dispatchRegionOp.getOperation()->moveAfter(lastOperandDef.getValue());
-  return success();
-}
-
-LogicalResult fuseOutputs(DispatchRegion &dispatchRegion,
-                          OpDispatchPolicy &policy) {
-  LLVM_DEBUG(llvm::dbgs() << "++ FUSING OUTPUT\n");
-
-  FusionWorklist worklist(dispatchRegion.op.getOperation()->getBlock(),
-                          /*inverseTopological=*/false);
-  worklist.addResultUses(dispatchRegion.op.getResults());
-
-  while (Operation *nextOp = worklist.popNext()) {
-    if (!policy.isDispatchable(nextOp)) continue;
-    auto action = policy.fuseOutput(dispatchRegion.anchorOp, nextOp);
-    LLVM_DEBUG(llvm::dbgs().indent(2));
-    if (action == OpDispatchPolicy::FusionType::DISABLED) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "- SKIP NON FUSABLE INPUT: " << *nextOp << "\n");
-      continue;
-    }
-    if (action != OpDispatchPolicy::FusionType::MOVE_INTO) {
-      return nextOp->emitError()
-             << "cannot fuse output except with MOVE_INTO action";
-    }
-    if (failed(moveDispatchOp(dispatchRegion.op, nextOp))) {
-      LLVM_DEBUG(llvm::dbgs() << "- SKIP Fusion due to SSA use-def violation "
-                              << *nextOp << "\n");
-      continue;
-    }
-    LLVM_DEBUG(llvm::dbgs() << "- FUSABLE OUTPUT(" << static_cast<int>(action)
-                            << "): " << *nextOp << "\n");
-    // Since results will be redirected to the region results, need to scan
-    // for worklist items before changing use-def chain.
-    worklist.addResultUses(nextOp->getResults());
-    OpBuilder builder(nextOp->getContext());
-    auto *inlinedOp =
-        dispatchRegion.op.inlineOp(nextOp, builder, /*positionAtEnd=*/true);
-    if (!inlinedOp) {
-      return failure();
-    }
-    dispatchRegion.returnAndReplaceUses(nextOp, inlinedOp);
-    if (nextOp->use_empty()) {
-      nextOp->erase();
-    }
-  }
-
-  return success();
-}
-
-LogicalResult processBlock(Block &block, OpDispatchPolicy &policy) {
-  int maxAnchorBenefit =
-      std::numeric_limits<OpDispatchPolicy::AnchorBenefit>::max();
-  // Maps DispatchRegionOp to the anchor op.
-  llvm::DenseMap<Operation *, Operation *> dispatchRegions;
-  // Per iteration scratch.
-  llvm::SmallVector<DispatchableOp, 10> dispatchableOps;
-
-  // Loop backwards from high anchor benefit to low.
-  for (;;) {
-    dispatchableOps.clear();
-    // Enumerate un-dispatched ops.
-    findDispatchableAnchorOps(block, policy, maxAnchorBenefit, dispatchableOps);
-    if (dispatchableOps.empty()) break;
-    llvm::sort(dispatchableOps);
-
-    // Traversing from back->front will produce ops in [anchorPriority, index]
-    // order.
-    auto &d = dispatchableOps.back();
-    if (d.anchorBenefit <= 0) break;
-    LLVM_DEBUG(llvm::dbgs() << "FORM DISPATCH REGION(" << d.index << ":"
-                            << d.anchorBenefit << "): " << *d.op << "\n");
-    auto dispatchRegion = DispatchRegion::form(d.op);
-    if (!dispatchRegion) return failure();
-    dispatchRegions.insert(
-        std::make_pair(dispatchRegion->op, dispatchRegion->anchorOp));
-
-    // Fuse outputs prior to inputs, since they can yield more things to
-    // evaluate for input fusion.
-    if (failed(fuseOutputs(*dispatchRegion, policy))) return failure();
-    if (failed(fuseInputs(*dispatchRegion, policy))) return failure();
-
-    // Ensure all unused operands and results are dce'd.
-    // Note that this may delete the op itself if it is unused.
-    optimizeClosureOp(dispatchRegion->op);
-    if (dispatchRegion->op) {
-      hoistDispatchRegionMetadataOps(*dispatchRegion, policy);
-    }
-  }
-  return success();
-}
-
-// Identifies dispatchable ops and moves them into dispatch regions.
-// Some ops, such as call, will be deferred until following passes.
-class IdentifyDispatchRegions2Pass
-    : public IdentifyDispatchRegions2Base<IdentifyDispatchRegions2Pass> {
- public:
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Flow::FlowDialect>();
-  }
-
-  void runOnOperation() override {
-    // NOTE: we require the DispatchabilityAnalysisPass to have run first.
-    auto dispatchability = getCachedParentAnalysis<Dispatchability>();
-    FuncOp func = getOperation();
-    if (!dispatchability.hasValue()) {
-      func.emitError()
-          << "dispatchability analysis not performed "
-             "on module; run -iree-flow-dispatchability-analysis first";
-      return signalPassFailure();
-    }
-
-    OpDispatchPolicy policy(*dispatchability);
-    for (auto &block : getOperation()) {
-      if (failed(processBlock(block, policy))) {
-        return signalPassFailure();
-      }
-    }
-  }
-};
-
-}  // namespace
-
-std::unique_ptr<OperationPass<FuncOp>> createIdentifyDispatchRegions2Pass() {
-  return std::make_unique<IdentifyDispatchRegions2Pass>();
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
deleted file mode 100644
index 54dffeb..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
+++ /dev/null

@@ -1,261 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-#include "iree/compiler/Dialect/Shape/IR/Builders.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeTypes.h"
-#include "iree/compiler/Dialect/Shape/Utils/TypeConversion.h"
-#include "llvm/Support/Debug.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/Pass/Pass.h"
-
-#define DEBUG_TYPE "iree-dispatch"
-
-namespace mlir {
-namespace iree_compiler {
-
-using Shape::getShapeToPrimitiveTypeExpander;
-
-namespace IREE {
-namespace Flow {
-
-namespace {
-
-static llvm::cl::opt<bool> traceDispatchTensors(
-    "iree-flow-trace-dispatch-tensors",
-    llvm::cl::desc("Trace input/output values for each dispatch function"),
-    llvm::cl::init(false));
-
-// Converts a dispatch_region into a dispatch to the outlined region function.
-LogicalResult convertToDispatchOp(DispatchRegionOp regionOp,
-                                  ExecutableOp executableOp,
-                                  DispatchEntryOp entryPointOp,
-                                  FuncOp outlinedFuncOp) {
-  // Insert at the same place as the original region.
-  OpBuilder builder(regionOp);
-
-  // Perform shape to primitive type expansion.
-  auto &typeExpander = getShapeToPrimitiveTypeExpander();
-  SmallVector<Value, 4> origArgs(regionOp.args());
-  SmallVector<Value, 4> newArgs;
-  if (failed(typeExpander.expandSourceValuesToTarget(
-          regionOp.getLoc(), origArgs, newArgs, builder))) {
-    return failure();
-  }
-
-  auto getTensorTypeArgs = [](auto args) {
-    SmallVector<Value, 4> res;
-    for (auto arg : args) {
-      if (arg.getType().template isa<TensorType>()) res.push_back(arg);
-    }
-    return res;
-  };
-  if (traceDispatchTensors) {
-    std::string str = "Input for " + std::string(outlinedFuncOp.getName());
-    builder.create<TensorTraceOp>(regionOp.getLoc(), builder.getStringAttr(str),
-                                  getTensorTypeArgs(newArgs));
-  }
-
-  SmallVector<Value, 4> operandDynamicDims;
-  for (auto operand : regionOp.args()) {
-    if (operand.getType().isa<ShapedType>()) {
-      operandDynamicDims.append(Shape::buildOrFindDynamicDimsForValue(
-          regionOp.getLoc(), operand, builder));
-    }
-  }
-  SmallVector<Value, 4> resultDynamicDims;
-  for (auto result : regionOp.results()) {
-    if (result.getType().isa<ShapedType>()) {
-      resultDynamicDims.append(Shape::buildOrFindDynamicDimsForValue(
-          regionOp.getLoc(), result, builder));
-    }
-  }
-
-  // Create the dispatch op to the executable function.
-  auto dispatchOp = builder.create<DispatchOp>(
-      regionOp.getLoc(), entryPointOp, ValueRange{regionOp.workload()},
-      outlinedFuncOp.getType().getResults(), resultDynamicDims, newArgs,
-      operandDynamicDims, ArrayRef<int64_t>{});
-
-  if (traceDispatchTensors) {
-    std::string str = "Output for " + std::string(outlinedFuncOp.getName());
-    builder.create<TensorTraceOp>(regionOp.getLoc(), builder.getStringAttr(str),
-                                  getTensorTypeArgs(dispatchOp.getResults()));
-  }
-
-  // Replace uses of the existing results with the new results.
-  for (int i = 0; i < regionOp.getNumResults(); ++i) {
-    regionOp.getResult(i).replaceAllUsesWith(dispatchOp.getResult(i));
-  }
-
-  // Erase original region.
-  regionOp.erase();
-
-  return success();
-}
-
-// Converts a region body to a function.
-// The region entry block args and return terminators are used to derive the
-// function type.
-FuncOp createRegionFunction(Location loc, StringRef functionName,
-                            Region &region) {
-  // Build function type matching 1:1 with the region signature.
-  SmallVector<Type, 4> operandTypes;
-  SmallVector<Type, 4> resultTypes;
-  auto &entryBlock = region.front();
-  for (auto &operand : entryBlock.getArguments()) {
-    operandTypes.push_back(operand.getType());
-  }
-  for (auto &block : region.getBlocks()) {
-    if (auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(block.back())) {
-      resultTypes = llvm::to_vector<4>(returnOp.getOperandTypes());
-      break;
-    }
-  }
-
-  // Clone region into the function body.
-  auto functionType =
-      FunctionType::get(region.getContext(), operandTypes, resultTypes);
-  auto funcOp = FuncOp::create(loc, functionName, functionType);
-  BlockAndValueMapping mapping;
-  region.cloneInto(&funcOp.getBody(), mapping);
-
-  // Replace flow.return with std.return.
-  for (auto &block : funcOp.getBlocks()) {
-    if (auto returnOp = dyn_cast<IREE::Flow::ReturnOp>(block.back())) {
-      OpBuilder builder(returnOp);
-      builder.create<mlir::ReturnOp>(
-          returnOp.getLoc(), llvm::to_vector<4>(returnOp.getOperands()));
-      returnOp.erase();
-    }
-  }
-
-  // Remove any tie_shapes not from entry block args.
-  // TODO(laurenzo): Remove this once we are not materializing ties in
-  // dispatch regions at all. For now, this at least provides a better
-  // contract to the backends without leaking the fact that dispatch
-  // formation fully materializes ties.
-  auto *newEntryBlock = &funcOp.getBlocks().front();
-  funcOp.walk([&](Shape::TieShapeOp tieOp) {
-    if (auto blockArg = tieOp.operand().dyn_cast<BlockArgument>()) {
-      if (blockArg.getOwner() == newEntryBlock) return;
-    }
-    // Elide.
-    tieOp.result().replaceAllUsesWith(tieOp.operand());
-    tieOp.erase();
-  });
-
-  // Expand shape types to primitives.
-  auto &typeExpander = getShapeToPrimitiveTypeExpander();
-  OpBuilder expandBuilder(funcOp.getContext());
-  if (failed(typeExpander.expandFunctionSignature(funcOp, expandBuilder)) ||
-      failed(typeExpander.expandAllReturnLikeTerminators<mlir::ReturnOp>(
-          funcOp, expandBuilder))) {
-    return nullptr;
-  }
-
-  return funcOp;
-}
-
-// Outlines a dispatch region into a flow.executable.
-LogicalResult outlineDispatchRegion(
-    DispatchRegionOp regionOp, int outlinedRegionOrdinal,
-    llvm::StringMap<FuncOp> &dispatchableFuncOps) {
-  // Create the dispatch function.
-  auto parentFuncOp = regionOp->getParentOfType<FuncOp>();
-  std::string namePrefix = parentFuncOp.getName().str() + "_ex_dispatch_" +
-                           std::to_string(outlinedRegionOrdinal);
-
-  // Convert the region to a function.
-  auto dispatchFuncOp =
-      createRegionFunction(regionOp.getLoc(), namePrefix, regionOp.body());
-  if (!dispatchFuncOp) {
-    return failure();
-  }
-
-  // Create the executable with the region cloned into it.
-  auto executableOp = createExecutable(
-      regionOp.getLoc(), namePrefix, {dispatchFuncOp},
-      parentFuncOp->getParentOfType<ModuleOp>(), dispatchableFuncOps);
-  executableOp.getOperation()->moveBefore(parentFuncOp);
-  executableOp.setPrivate();
-
-  // Add dispatch export pointing at the function.
-  OpBuilder builder(executableOp.body());
-  auto entryPointOp = builder.create<DispatchEntryOp>(
-      regionOp.getLoc(), builder.getStringAttr(dispatchFuncOp.getName()),
-      builder.getSymbolRefAttr(dispatchFuncOp),
-      TypeAttr::get(dispatchFuncOp.getType()),
-      /*workgroup_rank=*/IntegerAttr{});
-
-  // Finally convert the dispatch region into a dispatch to the outlined func.
-  return convertToDispatchOp(regionOp, executableOp, entryPointOp,
-                             dispatchFuncOp);
-}
-
-}  // namespace
-
-class OutlineDispatchRegionsPass
-    : public OutlineDispatchRegionsBase<OutlineDispatchRegionsPass> {
- public:
-  OutlineDispatchRegionsPass() = default;
-
-  void runOnOperation() override {
-    auto dispatchability = getCachedAnalysis<Dispatchability>();
-    llvm::StringMap<FuncOp> dispatchableFuncOps;
-    if (dispatchability.hasValue()) {
-      // if we do not get dispatchability from cache,
-      // we should keep dispatchableFuncOps empty to be comptaible as before
-      dispatchability.getValue().get().walkDispatchableOps([&](FuncOp funcOp) {
-        dispatchableFuncOps[funcOp.getName()] = funcOp;
-      });
-    }
-
-    // TODO(benvanik): replace with a pattern rewriter?
-    auto funcOps = llvm::to_vector<32>(getOperation().getOps<FuncOp>());
-    for (auto funcOp : funcOps) {
-      // Outline all of the dispatch regions ops in this function.
-      SmallVector<DispatchRegionOp, 8> dispatchRegionOps;
-      funcOp.walk(
-          [&](DispatchRegionOp op) { dispatchRegionOps.push_back(op); });
-      for (int i = 0; i < dispatchRegionOps.size(); ++i) {
-        if (failed(outlineDispatchRegion(dispatchRegionOps[i], i,
-                                         dispatchableFuncOps))) {
-          return signalPassFailure();
-        }
-      }
-    }
-  }
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegionsPass() {
-  return std::make_unique<OutlineDispatchRegionsPass>();
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp
index 779006f..e19401c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions2.cpp

@@ -14,11 +14,9 @@
 
 #include <utility>
 
-#include "iree/compiler/Dialect/Flow/Analysis/Dispatchability.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/Flow/Transforms/PassDetail.h"
 #include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
 #include "iree/compiler/Dialect/Shape/IR/Builders.h"
 #include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
 #include "iree/compiler/Dialect/Shape/IR/ShapeTypes.h"
@@ -39,6 +37,38 @@
 namespace Flow {
 namespace {
 
+// Creates a flow.executable out of a set of functions, pulling in all other
+// functions reachable by the provided functions.
+static ExecutableOp createExecutable(Location loc, StringRef executableName,
+                                     ArrayRef<FuncOp> funcOps,
+                                     ModuleOp parentModuleOp) {
+  assert(!funcOps.empty() && "must have at least one entry function");
+
+  // Create the executable that will contain the outlined region.
+  // NOTE: this will get uniquified if we have multiple in the same block.
+  OpBuilder parentModuleBuilder(&parentModuleOp.getBody()->back());
+  auto executableOp =
+      parentModuleBuilder.create<IREE::Flow::ExecutableOp>(loc, executableName);
+
+  // Create the inner ModuleOp that contains the original functions. We need
+  // to provide this shim as some ops (like std.call) look for the
+  // containing module to provide symbol resolution.
+  OpBuilder executableBuilder(executableOp);
+  executableBuilder.setInsertionPointToStart(&executableOp.getBlock());
+  auto innerModule = executableBuilder.create<ModuleOp>(loc);
+  for (auto funcOp : funcOps) {
+    innerModule.push_back(funcOp);
+  }
+
+  // Copy all reachable functions into the executable.
+  // Linker passes may dedupe these later on.
+  OpBuilder innerModuleBuilder = OpBuilder::atBlockEnd(innerModule.getBody());
+  innerModuleBuilder.setInsertionPoint(innerModule.getBody(),
+                                       ++innerModule.getBody()->begin());
+
+  return executableOp;
+}
+
 // Converts a dispatch region op into a dispatch op to the outlined region.
 static LogicalResult convertToDispatchOp(DispatchWorkgroupsOp regionOp,
                                          ExecutableOp executableOp,
@@ -173,8 +203,7 @@
 // Outlines a dispatch region into a flow.executable and replaces the region op
 // with a dispatch to that outlined executable.
 static LogicalResult outlineDispatchWorkgroupsOp(
-    std::string namePrefix, DispatchWorkgroupsOp regionOp,
-    llvm::StringMap<FuncOp> &dispatchableFuncOps) {
+    std::string namePrefix, DispatchWorkgroupsOp regionOp) {
   // Convert the region to a free-floating function.
   auto workgroupFuncOp =
       createWorkgroupFunc(regionOp.getLoc(), namePrefix, regionOp.body());
@@ -184,9 +213,9 @@
 
   // Create the executable with the region cloned into it.
   auto parentFuncOp = regionOp->getParentOfType<FuncOp>();
-  auto executableOp = createExecutable(
-      regionOp.getLoc(), namePrefix, {workgroupFuncOp},
-      parentFuncOp->getParentOfType<ModuleOp>(), dispatchableFuncOps);
+  auto executableOp =
+      createExecutable(regionOp.getLoc(), namePrefix, {workgroupFuncOp},
+                       parentFuncOp->getParentOfType<ModuleOp>());
   executableOp.getOperation()->moveBefore(parentFuncOp);
   executableOp.setPrivate();
 
@@ -210,14 +239,6 @@
   OutlineDispatchRegions2Pass() = default;
 
   void runOnOperation() override {
-    // Mark all functions that are dispatchable and can be moved into dispatch
-    // executables when they are called. A dispatch region using a
-    // non-dispatchable function is considered an error.
-    auto &dispatchability = getAnalysis<Dispatchability>();
-    llvm::StringMap<FuncOp> dispatchableFuncOps;
-    dispatchability.walkDispatchableOps(
-        [&](FuncOp funcOp) { dispatchableFuncOps[funcOp.getName()] = funcOp; });
-
     // Convert each dispatch region into a flow.executable + dispatch op.
     for (auto funcOp : getOperation().getOps<FuncOp>()) {
       // Outline all of the dispatch regions ops in this function.
@@ -226,8 +247,8 @@
       for (int i = 0; i < dispatchWorkgroupsOps.size(); ++i) {
         std::string namePrefix =
             funcOp.getName().str() + "_dispatch_" + std::to_string(i);
-        if (failed(outlineDispatchWorkgroupsOp(
-                namePrefix, dispatchWorkgroupsOps[i], dispatchableFuncOps))) {
+        if (failed(outlineDispatchWorkgroupsOp(namePrefix,
+                                               dispatchWorkgroupsOps[i]))) {
           return signalPassFailure();
         }
       }

diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index 4d9858f..214f864 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp

@@ -73,9 +73,6 @@
 static void buildHLOInputTransformPassPipeline(OpPassManager &passManager) {
   passManager.addNestedPass<FuncOp>(
       IREE::Flow::createHLOToHLOPreprocessingPass());
-  // TODO(ataei): This should run as part of createHLOToHLOPreprocessingPass
-  // which will break VMLA backend.
-  passManager.addNestedPass<FuncOp>(createDecomposeHLOClampPass());
 
   // Run passes to remove shape constraints. HLO lowering inserts them, but they
   // are not desired here.
@@ -104,8 +101,7 @@
       });
 }
 
-void buildFlowTransformPassPipeline(OpPassManager &passManager,
-                                    bool dispatchLinalgOnTensors) {
+void buildFlowTransformPassPipeline(OpPassManager &passManager) {
   //----------------------------------------------------------------------------
   // Entry dialect cleanup
   //----------------------------------------------------------------------------
@@ -202,51 +198,39 @@
       IREE::Flow::createPrePartitioningConversionPass());
   passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
 
-  if (dispatchLinalgOnTensors) {
-    // TODO(benvanik): move up to input; requires pre-partitioning conversion
-    // to be reworked first.
-    passManager.addNestedPass<FuncOp>(
-        mlir::iree_compiler::createHLOToLinalgOnTensorsPass(true));
+  // TODO(benvanik): move up to input; requires pre-partitioning conversion
+  // to be reworked first.
+  passManager.addNestedPass<FuncOp>(
+      mlir::iree_compiler::createHLOToLinalgOnTensorsPass(true));
 
-    if (clEnable1x1ConvToMatmul) {
-      passManager.addNestedPass<FuncOp>(
-          mlir::iree_compiler::createConvert1x1ConvToMatmulPass());
-    }
-    if (clEnableConvToImg2Col) {
-      passManager.addNestedPass<FuncOp>(
-          mlir::iree_compiler::createConvertConv2DToImg2ColPass());
-    }
-
+  if (clEnable1x1ConvToMatmul) {
     passManager.addNestedPass<FuncOp>(
-        mlir::createConvertElementwiseToLinalgPass());
-    passManager.addNestedPass<FuncOp>(
-        mlir::createLinalgFoldUnitExtentDimsPass());
-    passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
-    passManager.addNestedPass<FuncOp>(
-        mlir::iree_compiler::createFusionOfTensorOpsPass());
-    passManager.addNestedPass<FuncOp>(
-        IREE::Flow::createConvertToFlowTensorOpsPass());
-    passManager.addNestedPass<FuncOp>(mlir::createCSEPass());
-
-    passManager.addNestedPass<FuncOp>(
-        IREE::Flow::createDispatchLinalgOnTensorsPass());
-    // NOTE: required because the current dispatch-linalg-on-tensors pass
-    // creates a lot of dead IR that needs to be cleaned up.
-    passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
-
-    // Outline the dispatch regions into their own functions wrapped in
-    // executables.
-    passManager.addPass(IREE::Flow::createOutlineDispatchRegions2Pass());
-  } else {
-    // DEPRECATED: legacy HLO-based path.
-    passManager.addPass(IREE::Flow::createDispatchabilityAnalysisPass());
-    passManager.addNestedPass<FuncOp>(
-        IREE::Flow::createIdentifyDispatchRegions2Pass());
-    passManager.addNestedPass<FuncOp>(createCSEPass());
-    passManager.addNestedPass<FuncOp>(
-        IREE::Flow::createFoldCompatibleDispatchRegionsPass());
-    passManager.addPass(IREE::Flow::createOutlineDispatchRegionsPass());
+        mlir::iree_compiler::createConvert1x1ConvToMatmulPass());
   }
+  if (clEnableConvToImg2Col) {
+    passManager.addNestedPass<FuncOp>(
+        mlir::iree_compiler::createConvertConv2DToImg2ColPass());
+  }
+
+  passManager.addNestedPass<FuncOp>(
+      mlir::createConvertElementwiseToLinalgPass());
+  passManager.addNestedPass<FuncOp>(mlir::createLinalgFoldUnitExtentDimsPass());
+  passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+  passManager.addNestedPass<FuncOp>(
+      mlir::iree_compiler::createFusionOfTensorOpsPass());
+  passManager.addNestedPass<FuncOp>(
+      IREE::Flow::createConvertToFlowTensorOpsPass());
+  passManager.addNestedPass<FuncOp>(mlir::createCSEPass());
+
+  passManager.addNestedPass<FuncOp>(
+      IREE::Flow::createDispatchLinalgOnTensorsPass());
+  // NOTE: required because the current dispatch-linalg-on-tensors pass
+  // creates a lot of dead IR that needs to be cleaned up.
+  passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+
+  // Outline the dispatch regions into their own functions wrapped in
+  // executables.
+  passManager.addPass(IREE::Flow::createOutlineDispatchRegions2Pass());
 
   // Cleanup identity ops that clutter up the IR and canonicalize.
   passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
@@ -310,13 +294,7 @@
       "iree-flow-transformation-pipeline",
       "Runs the full IREE flow dialect transformation pipeline",
       [](OpPassManager &passManager) {
-        buildFlowTransformPassPipeline(passManager, false);
-      });
-  PassPipelineRegistration<> tensorTransformPassPipeline(
-      "iree-flow-transformation-pipeline-tensors",
-      "Runs the full IREE flow dialect transformation pipeline",
-      [](OpPassManager &passManager) {
-        buildFlowTransformPassPipeline(passManager, true);
+        buildFlowTransformPassPipeline(passManager);
       });
 }
 
@@ -330,8 +308,8 @@
   registerPasses();
 
   // Pipelines.
-  registerFlowTransformPassPipeline();
   registerInputTransformPassPipeline();
+  registerFlowTransformPassPipeline();
 }
 
 }  // namespace Flow

diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h
index 2a64d7b..34df061 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.h
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h

@@ -52,8 +52,7 @@
 //   buildInputTransformPassPipeline
 //   buildFlowTransformPassPipeline
 //   <run conversion from flow to sequencer/hal/vm/etc>
-void buildFlowTransformPassPipeline(OpPassManager &passManager,
-                                    bool dispatchLinalgOnTensors = true);
+void buildFlowTransformPassPipeline(OpPassManager &passManager);
 
 void registerFlowTransformPassPipeline();
 
@@ -86,28 +85,14 @@
 std::unique_ptr<OperationPass<ModuleOp>> createExpandVariableDynamicDimsPass();
 
 //===----------------------------------------------------------------------===//
-// Dispatches (flow.dispatch.region)
+// Dispatches (flow.dispatch.workgroups)
 //===----------------------------------------------------------------------===//
 
 /// Pass to perform dispatch of Linalg on tensor ops by tiling and distribution.
 /// A dispatch region is created for each tiled loop nest.
 std::unique_ptr<OperationPass<FuncOp>> createDispatchLinalgOnTensorsPass();
 
-// Analyzes a module to identify which functions are dispatchable.
-// This information is cached on the module and is used by other FuncOp-scoped
-// passes to quickly access the module-level dispatchability information.
-std::unique_ptr<OperationPass<ModuleOp>> createDispatchabilityAnalysisPass();
-
-// Identifies dispatchable regions of functions and wraps them in
-// flow.dispatch_regions (version 2).
-std::unique_ptr<OperationPass<FuncOp>> createIdentifyDispatchRegions2Pass();
-
-// Folds multiple dispatch regions together that have compatible workloads.
-std::unique_ptr<OperationPass<FuncOp>>
-createFoldCompatibleDispatchRegionsPass();
-
 // Outlines dispatch regions into executables.
-std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegionsPass();
 std::unique_ptr<OperationPass<ModuleOp>> createOutlineDispatchRegions2Pass();
 
 // Injects tracing markers for dispatch operation tensor inputs and outputs.

diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.td b/iree/compiler/Dialect/Flow/Transforms/Passes.td
index ad62531..a833e8a 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.td
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.td

@@ -29,12 +29,6 @@
   let constructor = "mlir::iree_compiler::IREE::Flow::createDeduplicateExecutablesPass()";
 }
 
-def DispatchabilityAnalysis :
-    Pass<"iree-flow-dispatchability-analysis", "ModuleOp"> {
-  let summary = "Analyzes functions to determine their dispatchability";
-  let constructor = "mlir::iree_compiler::IREE::Flow::createDispatchabilityAnalysisPass()";
-}
-
 def DispatchLinalgOnTensors :
     Pass<"iree-flow-dispatch-linalg-on-tensors-pass", "FuncOp"> {
   let summary = "Dispatch Linalg operations on tensors by using tile and distribute";
@@ -53,12 +47,6 @@
   let constructor = "mlir::iree_compiler::IREE::Flow::createExportBenchmarkFuncsPass()";
 }
 
-def FoldCompatibleDispatchRegions :
-    Pass<"iree-flow-fold-compatible-dispatch-regions", "FuncOp"> {
-  let summary = "Folds dispatch regions that have compatible workloads";
-  let constructor = "mlir::iree_compiler::IREE::Flow::createFoldCompatibleDispatchRegionsPass()";
-}
-
 def FormStreams :
     Pass<"iree-flow-form-streams", "FuncOp"> {
   let summary = "Identifies dispatches that can be grouped into streams within functions";
@@ -77,12 +65,6 @@
   let constructor = "mlir::iree_compiler::IREE::Flow::createHLOToHLOPreprocessingPass()";
 }
 
-def IdentifyDispatchRegions2 :
-    Pass<"iree-flow-identify-dispatch-regions2", "FuncOp"> {
-  let summary = "Conservatively identifies dispatch regions in functions (v2)";
-  let constructor = "mlir::iree_compiler::IREE::Flow::createIdentifyDispatchRegions2Pass()";
-}
-
 def InjectDispatchTracing :
     Pass<"iree-flow-inject-dispatch-tracing", "FuncOp"> {
   let summary = "Injects dispatch region tracing";
@@ -95,12 +77,6 @@
   let constructor = "mlir::iree_compiler::IREE::Flow::createLegalizeInputTypesPass()";
 }
 
-def OutlineDispatchRegions :
-    Pass<"iree-flow-outline-dispatch-regions", "ModuleOp"> {
-  let summary = "Outlines dispatch regions into standalone functions";
-  let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineDispatchRegionsPass()";
-}
-
 def OutlineDispatchRegions2 :
     Pass<"iree-flow-outline-dispatch-regions2", "ModuleOp"> {
   let summary = "Outlines dispatch regions into executables";

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/BUILD b/iree/compiler/Dialect/Flow/Transforms/test/BUILD
index 7ab73fb..a88fda9 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/BUILD
+++ b/iree/compiler/Dialect/Flow/Transforms/test/BUILD

@@ -32,17 +32,11 @@
             "dispatch_linalg_on_tensors_fusion.mlir",
             "expand_variable_dynamic_dims.mlir",
             "export_benchmark_funcs.mlir",
-            "fold_compatible_dispatch_regions.mlir",
             "form_streams.mlir",
             "hlo_to_hlo_preprocessing.mlir",
             "hlo_to_hlo_preprocessing_canoncalize_dot_general.mlir",
             "hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir",
             "hoist_unstreamable_ops.mlir",
-            "identify_dispatch_regions2_enable_matmul_fusion.mlir",
-            "identify_dispatch_regions2_hlo.mlir",
-            "identify_dispatch_regions2_linalg.mlir",
-            "identify_dispatch_regions2_shapes.mlir",
-            "identify_dispatch_regions2_std_fusion.mlir",
             "inject_dispatch_tracing.mlir",
             "legalize_input_types.mlir",
             "outline_dispatch_regions2.mlir",

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt b/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
index adb9779..2fcfbb2 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt

@@ -21,17 +21,11 @@
     "dispatch_linalg_on_tensors_fusion.mlir"
     "expand_variable_dynamic_dims.mlir"
     "export_benchmark_funcs.mlir"
-    "fold_compatible_dispatch_regions.mlir"
     "form_streams.mlir"
     "hlo_to_hlo_preprocessing.mlir"
     "hlo_to_hlo_preprocessing_canoncalize_dot_general.mlir"
     "hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir"
     "hoist_unstreamable_ops.mlir"
-    "identify_dispatch_regions2_enable_matmul_fusion.mlir"
-    "identify_dispatch_regions2_hlo.mlir"
-    "identify_dispatch_regions2_linalg.mlir"
-    "identify_dispatch_regions2_shapes.mlir"
-    "identify_dispatch_regions2_std_fusion.mlir"
     "inject_dispatch_tracing.mlir"
     "legalize_input_types.mlir"
     "outline_dispatch_regions2.mlir"

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir b/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
index e1465bf..5d3545a 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir

@@ -7,25 +7,6 @@
     return %0, %1 : tensor<5x5xf32>, tensor<3x5xf32>
   }
 }
-// CHECK-DAG: flow.variable @[[IN0_0:.+]] dense<{{.*}}> : tensor<5x3xf32>
-// CHECK-DAG: flow.variable @[[IN0_1:.+]] dense<{{.*}}> : tensor<3x5xf32>
-//     CHECK: func @two_dispatch_ex_dispatch_0_benchmark
-//     CHECK: %[[RES:.+]] = flow.ex.stream.fragment() : () -> tensor<5x5xf32> =
-// CHECK-DAG:   %{{.+}} = flow.variable.load @[[IN0_0]] : tensor<5x3xf32>
-// CHECK-DAG:   %{{.+}} = flow.variable.load @[[IN0_1]] : tensor<3x5xf32>
-//     CHECK:   %[[DISPATCH_RES:.+]] = flow.dispatch @two_dispatch_ex_dispatch_0::@two_dispatch_ex_dispatch_0[%{{.+}}](%{{.+}}, %{{.+}}) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
-//     CHECK:   flow.return %[[DISPATCH_RES]] : tensor<5x5xf32>
-//     CHECK: iree.do_not_optimize(%[[RES]]) : tensor<5x5xf32>
-
-// CHECK-DAG: flow.variable @[[IN1_0:.+]] dense<{{.*}}> : tensor<3x5xf32>
-// CHECK-DAG: flow.variable @[[IN1_1:.+]] dense<{{.*}}> : tensor<5x5xf32>
-//     CHECK: func @two_dispatch_ex_dispatch_1_benchmark
-//     CHECK: %[[RES:.+]] = flow.ex.stream.fragment() : () -> tensor<3x5xf32>
-// CHECK-DAG:   %{{.+}} = flow.variable.load @[[IN1_0]] : tensor<3x5xf32>
-// CHECK-DAG:   %{{.+}} = flow.variable.load @[[IN1_1]] : tensor<5x5xf32>
-//     CHECK:   %[[DISPATCH_RES:.+]] = flow.dispatch @two_dispatch_ex_dispatch_1::@two_dispatch_ex_dispatch_1[%{{.+}}](%{{.+}}, %{{.+}}) : (tensor<3x5xf32>, tensor<5x5xf32>) -> tensor<3x5xf32>
-//     CHECK:   flow.return %[[DISPATCH_RES]] : tensor<3x5xf32>
-//     CHECK: iree.do_not_optimize(%[[RES]]) : tensor<3x5xf32>
 
 // CHECK-DAG: flow.variable @[[MAIN_IN_0:.+]] dense<{{.*}}> : tensor<5x3xf32>
 // CHECK-DAG: flow.variable @[[MAIN_IN_1:.+]] dense<{{.*}}> : tensor<3x5xf32>

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
deleted file mode 100644
index d7ec93e..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
+++ /dev/null

@@ -1,160 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-fold-compatible-dispatch-regions %s | IreeFileCheck %s
-
-func @noFolding(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %cst = constant 4 : index
-  %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-    %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-    flow.return %1 : tensor<4xf32>
-  }
-  return %0 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @noFolding
-// CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 4 : index
-// CHECK-NEXT: %0 = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-// CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-// CHECK-NEXT:   flow.return %1 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %0 : tensor<4xf32>
-
-// -----
-
-func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %cst = constant 4 : index
-  %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-    %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-    flow.return %1 : tensor<4xf32>
-  }
-  %2 = flow.dispatch.region[%cst : index](%arg2 = %arg0 : tensor<4xf32>, %arg3 = %0 : tensor<4xf32>) -> tensor<4xf32> {
-    %3 = mhlo.subtract %arg3, %arg2 : tensor<4xf32>
-    flow.return %3 : tensor<4xf32>
-  }
-  %4 = flow.dispatch.region[%cst : index](%arg4 = %arg0 : tensor<4xf32>, %arg5 = %2 : tensor<4xf32>) -> tensor<4xf32> {
-    %5 = mhlo.multiply %arg4, %arg5 : tensor<4xf32>
-    flow.return %5 : tensor<4xf32>
-  }
-  return %4 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @elementwiseOps
-// CHECK: %[[WORKLOAD0:.+]] = constant 4
-// CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-// CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-// CHECK-NEXT:   %2 = mhlo.subtract %1, %arg1 : tensor<4xf32>
-// CHECK-NEXT:   %3 = mhlo.multiply %arg1, %2 : tensor<4xf32>
-// CHECK-NEXT:   flow.return %3 : tensor<4xf32>
-// CHECK-NEXT: }
-// CHECK: return %[[R0]] : tensor<4xf32>
-
-// -----
-
-func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %cst = constant 16 : index
-  %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
-    flow.return %3 : tensor<4x4xf32>
-  }
-  %cst_0 = constant 16 : index
-  %1 = flow.dispatch.region[%cst_0 : index](%arg1 = %0 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-    flow.return %3 : tensor<4x4xf32>
-  }
-  %cst_1 = constant 16 : index
-  %2 = flow.dispatch.region[%cst_1 : index](%arg1 = %1 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
-    flow.return %3 : tensor<4x4xf32>
-  }
-  return %2 : tensor<4x4xf32>
-}
-
-// CHECK-LABEL: func @interleavedDot
-// CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT:   %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
-// CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[WORKLOAD1:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region[%[[WORKLOAD1]] : index](%arg1 = %[[R0]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT:   %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[WORKLOAD2:.+]] = constant 16 : index
-// CHECK-NEXT: %[[R2:.+]] = flow.dispatch.region[%[[WORKLOAD2]] : index](%arg1 = %[[R1]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> (tensor<4x4xf32>) {
-// CHECK-NEXT:   %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
-// CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: return %[[R2]] : tensor<4x4xf32>
-
-// -----
-
-module {
-  flow.variable @var1 dense<1.000000e+00> : tensor<4xf32>
-  flow.variable @var2 dense<2.000000e+00> : tensor<4xf32>
-  func @notDominate() -> tensor<4xf32> {
-    %c4 = constant 4 : index
-    %0 = flow.variable.load @var1 : tensor<4xf32>
-    %1 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>) -> tensor<4xf32> {
-      %4 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-      flow.return %4 : tensor<4xf32>
-    }
-    %2 = flow.variable.load @var2 : tensor<4xf32>
-    %3 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>, %arg1 = %2 : tensor<4xf32>) -> tensor<4xf32> {
-      %4 = mhlo.subtract %arg1, %arg0 : tensor<4xf32>
-      flow.return %4 : tensor<4xf32>
-    }
-    return %3 : tensor<4xf32>
-  }
-}
-// CHECK-LABEL: func @notDominate
-//       CHECK: flow.dispatch.region
-//       CHECK: flow.dispatch.region
-
-// -----
-
-module {
-  flow.variable @var1 dense<1.000000e+00> : tensor<4xf32>
-  flow.variable @var2 dense<2.000000e+00> : tensor<4xf32>
-  func @dominate() -> tensor<4xf32> {
-    %c4 = constant 4 : index
-    %0 = flow.variable.load @var1 : tensor<4xf32>
-    %1 = flow.variable.load @var2 : tensor<4xf32>
-    %2 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>) -> tensor<4xf32> {
-      %4 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-      flow.return %4 : tensor<4xf32>
-    }
-    %3 = flow.dispatch.region[%c4 : index](%arg0 = %0 : tensor<4xf32>, %arg1 = %1 : tensor<4xf32>) -> tensor<4xf32> {
-      %4 = mhlo.subtract %arg1, %arg0 : tensor<4xf32>
-      flow.return %4 : tensor<4xf32>
-    }
-    return %3 : tensor<4xf32>
-  }
-}
-// CHECK-LABEL: func @dominate
-//       CHECK: flow.dispatch.region
-//   CHECK-NOT: flow.dispatch.region
-
-// -----
-
-module {
-  func @torch_index_select_producer(%arg0: tensor<5x1x5xi32>,
-                                    %arg1: tensor<2xi32>) -> tensor<2x1x5xi32> {
-    %c10 = constant 0 : index
-    %0 = flow.dispatch.region[%c10 : index](%arg2 = %arg0 : tensor<5x1x5xi32>,
-                                            %arg3 = %arg1 : tensor<2xi32>) -> tensor<2x1x5xi32> {
-      %1 = "mhlo.torch_index_select"(%arg2, %arg3) {
-        dim = 0 : i64,
-        batch_dims = 0 : i64
-      } : (tensor<5x1x5xi32>, tensor<2xi32>) -> tensor<2x1x5xi32>
-      flow.return %1 : tensor<2x1x5xi32>
-    }
-    %1 = flow.dispatch.region[%c10 : index](%arg2 = %0 : tensor<2x1x5xi32>) -> tensor<2x1x5xi32> {
-      %2 = mhlo.add %arg2, %arg2 : tensor<2x1x5xi32>
-      flow.return %2 : tensor<2x1x5xi32>
-    }
-    return %1 : tensor<2x1x5xi32>
-  }
-}
-// CHECK-LABEL: func @torch_index_select_producer
-//       CHECK: flow.dispatch.region
-//  CHECK-NEXT:   mhlo.torch_index_select
-//  CHECK-NEXT:   mhlo.add

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir
deleted file mode 100644
index ae87444..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_enable_matmul_fusion.mlir
+++ /dev/null

@@ -1,187 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 -iree-enable-consumer-only-fusion -canonicalize %s | IreeFileCheck %s
-
-func @simpleDotAddMul
-  (%arg0 : tensor<16x32xf32>, %arg1 : tensor<32x48xf32>,
-   %arg2 : tensor<16x48xf32>, %arg3 : tensor<16x48xf32>) -> tensor<16x48xf32> {
-  %0 = "mhlo.dot"(%arg0, %arg1) :
-    (tensor<16x32xf32>, tensor<32x48xf32>) -> tensor<16x48xf32>
-  %1 = mhlo.add %0, %arg2 : tensor<16x48xf32>
-  %2 = mhlo.multiply %1, %arg3 : tensor<16x48xf32>
-  return %2 : tensor<16x48xf32>
-}
-// CHECK-LABEL: func @simpleDotAddMul
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<16x32xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x48xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-//  CHECK-NEXT:   %[[WORKLOAD:.+]] = constant 768
-//  CHECK-NEXT:   %[[RESULT:.+]] = flow.dispatch.region[%[[WORKLOAD]] : index]
-//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG1]]
-//  CHECK-SAME:     %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG2]]
-//  CHECK-SAME:     %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG3]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T1:.+]] = "mhlo.dot"(%[[ARG4]], %[[ARG5]])
-//  CHECK-NEXT:       %[[T2:.+]] = mhlo.add %[[T1]], %[[ARG6]]
-//  CHECK-NEXT:       %[[T3:.+]] = mhlo.multiply %[[T2]], %[[ARG7]]
-//  CHECK-NEXT:       flow.return %[[T3]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   return %[[RESULT]]
-
-// -----
-
-func @twoDots
-  (%arg0 : tensor<16x32xf32>, %arg1 : tensor<32x48xf32>,
-   %arg2 : tensor<16x48xf32>, %arg3 : tensor<16x64xf32>,
-   %arg4 : tensor<16x64xf32>) -> tensor<16x64xf32> {
-  %0 = "mhlo.dot"(%arg0, %arg1) :
-    (tensor<16x32xf32>, tensor<32x48xf32>) -> tensor<16x48xf32>
-  %1 = mhlo.add %0, %arg2 : tensor<16x48xf32>
-  %2 = "mhlo.dot"(%1, %arg3) :
-    (tensor<16x48xf32>, tensor<16x64xf32>) -> tensor<16x64xf32>
-  %3 = mhlo.multiply %2, %arg4 : tensor<16x64xf32>
-  return %3 : tensor<16x64xf32>
-}
-// CHECK-LABEL: func @twoDots
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<16x32xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x48xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<16x48xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<16x64xf32>
-//  CHECK-SAME:   %[[ARG4:[a-zA-Z0-9_]+]]: tensor<16x64xf32>
-//  CHECK-NEXT:   %[[WORKLOAD1:.+]] = constant 1024
-//  CHECK-NEXT:   %[[WORKLOAD2:.+]] = constant 768
-//  CHECK-NEXT:   %[[RESULT1:.+]] = flow.dispatch.region[%[[WORKLOAD2]] : index]
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:     %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG1]]
-//  CHECK-SAME:     %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG2]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T1:.+]] = "mhlo.dot"(%[[ARG5]], %[[ARG6]])
-//  CHECK-NEXT:       %[[T2:.+]] = mhlo.add %[[T1]], %[[ARG7]]
-//  CHECK-NEXT:       flow.return %[[T2]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   %[[RESULT2:.+]] = flow.dispatch.region[%[[WORKLOAD1]] : index]
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-//  CHECK-SAME:     %[[ARG6:[a-zA-Z0-9_]+]] = %[[ARG3]]
-//  CHECK-SAME:     %[[ARG7:[a-zA-Z0-9_]+]] = %[[ARG4]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.dot"(%[[ARG5]], %[[ARG6]])
-//  CHECK-NEXT:       %[[T4:.+]] = mhlo.multiply %[[T3]], %[[ARG7]]
-//  CHECK-NEXT:       flow.return %[[T4]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   return %[[RESULT2]]
-
-// -----
-
-func @moveDispatchOp
-  (%arg0 : tensor<1x384x384xf32>, %arg1 : tensor<384x512xf32>,
-   %arg2 : tensor<512xf32>) -> tensor<1x384x512xf32> {
-  %0 = "mhlo.reshape"(%arg0) : (tensor<1x384x384xf32>) -> tensor<384x384xf32>
-  %1 = "mhlo.dot"(%0, %arg1) :
-    (tensor<384x384xf32>, tensor<384x512xf32>) -> tensor<384x512xf32>
-  %2 = "mhlo.broadcast_in_dim"(%arg2)
-    {broadcast_dimensions = dense<1> : tensor<1xi64>} :
-    (tensor<512xf32>) -> tensor<384x512xf32>
-  %3 = mhlo.add %1, %2 : tensor<384x512xf32>
-  %4 = "mhlo.reshape"(%3) : (tensor<384x512xf32>) -> tensor<1x384x512xf32>
-  return %4 : tensor<1x384x512xf32>
-}
-// CHECK-LABEL: func @moveDispatchOp
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<1x384x384xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<384x512xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<512xf32>
-//       CHECK:   %[[RESULT1:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG2]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T1:.+]] = "mhlo.broadcast_in_dim"(%[[ARG3]])
-//  CHECK-NEXT:       flow.return %[[T1]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   %[[RESULT2:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG1]]
-//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T2:.+]] = "mhlo.reshape"(%[[ARG5]])
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.dot"(%[[T2]], %[[ARG3]])
-//  CHECK-NEXT:       %[[T4:.+]] = mhlo.add %[[T3]], %[[ARG4]]
-//  CHECK-NEXT:       %[[T5:.+]] = "mhlo.reshape"(%[[T4]])
-//  CHECK-NEXT:       flow.return %[[T5]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   return %[[RESULT2]]
-
-// -----
-
-func @dot_fusion_with_different_shape
-  (%arg0: tensor<384x512xf32>, %arg1: tensor<512x128xf32>,
-   %arg2: tensor<384x128xf32>) -> tensor<4x384x32xf32> {
-  %0 = "mhlo.dot"(%arg0, %arg1)
-    : (tensor<384x512xf32>, tensor<512x128xf32>) -> tensor<384x128xf32>
-  %1 = mhlo.add %0, %arg2 : tensor<384x128xf32>
-  %2 = "mhlo.reshape"(%1) : (tensor<384x128xf32>) -> tensor<1x384x4x32xf32>
-  %3 = "mhlo.transpose"(%2) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>}
-    : (tensor<1x384x4x32xf32>) -> tensor<1x4x384x32xf32>
-  %4 = "mhlo.reshape"(%3) : (tensor<1x4x384x32xf32>) -> tensor<4x384x32xf32>
-  return %4 : tensor<4x384x32xf32>
-}
-
-// CHECK-LABEL: func @dot_fusion_with_different_shape
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<384x512xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<512x128xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<384x128xf32>
-//       CHECK:   %[[RESULT1:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9_]+]] = %[[ARG1]]
-//  CHECK-SAME:     %[[ARG5:[a-zA-Z0-9_]+]] = %[[ARG2]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T2:.+]] = "mhlo.dot"(%[[ARG3]], %[[ARG4]])
-//  CHECK-NEXT:       %[[T3:.+]] = mhlo.add %[[T2]], %[[ARG5]]
-//  CHECK-NEXT:       %[[T4:.+]] = "mhlo.reshape"(%[[T3]])
-//  CHECK-NEXT:       flow.return %[[T4]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   %[[RESULT2:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T2:.+]] = "mhlo.transpose"(%[[ARG3]])
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.reshape"(%[[T2]])
-//  CHECK-NEXT:       flow.return %[[T3]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   return %[[RESULT2]]
-
-// -----
-
-func @dot_general_lower_swapped
-  (%arg0 : tensor<2x3xf32>, %arg1 : tensor<1x1x2xf32>) -> tensor<3x1x1xf32> {
-  %0 = "mhlo.transpose"(%arg0) {permutation = dense<[1, 0]> : tensor<2xi64>}
-    : (tensor<2x3xf32>) -> tensor<3x2xf32>
-  %1 = "mhlo.transpose"(%arg1) {permutation = dense<[2, 0, 1]> : tensor<3xi64>}
-    : (tensor<1x1x2xf32>) -> tensor<2x1x1xf32>
-  %2 = "mhlo.reshape"(%1) : (tensor<2x1x1xf32>) -> tensor<2x1xf32>
-  %3 = "mhlo.dot"(%0, %2) {precision_config = ["DEFAULT", "DEFAULT"]}
-    : (tensor<3x2xf32>, tensor<2x1xf32>) -> tensor<3x1xf32>
-  %4 = "mhlo.reshape"(%3) : (tensor<3x1xf32>) -> tensor<3x1x1xf32>
-  return %4 : tensor<3x1x1xf32>
-}
-// CHECK-LABEL: func @dot_general_lower_swapped
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x3xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1x1x2xf32>
-//       CHECK:   %[[RESULT1:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.transpose"(%[[ARG2]])
-//  CHECK-NEXT:       flow.return %[[T3]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   %[[RESULT2:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]] = %[[ARG1]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.transpose"(%[[ARG2]])
-//  CHECK-NEXT:       flow.return %[[T3]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   %[[RESULT3:.+]] = flow.dispatch.region
-//  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]] = %[[RESULT1]]
-//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]] = %[[RESULT2]]
-//  CHECK-SAME:     {
-//  CHECK-NEXT:       %[[T3:.+]] = "mhlo.reshape"(%[[ARG3]])
-//  CHECK-NEXT:       %[[T4:.+]] = "mhlo.dot"(%[[ARG2]], %[[T3]])
-//  CHECK-NEXT:       %[[T5:.+]] = "mhlo.reshape"(%[[T4]])
-//  CHECK-NEXT:       flow.return %[[T5]]
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   return %[[RESULT3]]

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
deleted file mode 100644
index eb1e8e4..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
+++ /dev/null

@@ -1,165 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// CHECK-LABEL: @simpleMath
-func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4
-  // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
-  // CHECK-SAME: [%[[WORKLOAD]] : index]
-  // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-  // CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
-  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-  // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
-  // CHECK-NEXT: }
-  // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @isolatedDot
-func @isolatedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  // NOTE: Fragile ordering. Workload constants are emitted in order a the
-  // top of the block.
-  // CHECK: flow.dispatch.region
-  // CHECK:   mhlo.add
-  // CHECK: flow.dispatch.region
-  // CHECK:   "mhlo.dot"
-  // CHECK: flow.dispatch.region
-  // CHECK:   mhlo.multiply
-  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
-  return %2 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @sameBenefit
-func @sameBenefit(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // Because these are all the same benefit, initial formation puts them each
-  // in their own region.
-  // CHECK: flow.dispatch.region
-  // CHECK:   mhlo.add
-  // CHECK: flow.dispatch.region
-  // CHECK:   call @callee
-  // CHECK: flow.dispatch.region
-  // CHECK:   mhlo.multiply
-  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
-  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK: %[[WORKLOAD0:.+]] = constant 4 : index
-  // CHECK: %[[R0:.+]] = flow.dispatch.region
-  // CHECK-SAME: [%[[WORKLOAD0]] : index]
-  // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-  // CHECK-NEXT:   %1 = mhlo.multiply %arg1, %arg1 : tensor<4xf32>
-  %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
-  // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
-  // CHECK-NEXT: }
-  // CHECK: return %[[R0]] : tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @copyAdd
-func @copyAdd(%arg0 : tensor<4xf32>) -> tensor<4x16xf32> {
-  // Because these are all the same benefit, initial formation puts them each
-  // in their own region.
-  // CHECK: flow.dispatch.region
-  // CHECK:      "mhlo.broadcast_in_dim"
-  // CHECK-NEXT: mhlo.add
-  %0 = "mhlo.broadcast_in_dim"(%arg0) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor<4xf32>) -> tensor<4x16xf32>
-  %1 = mhlo.add %0, %0 : tensor<4x16xf32>
-  return %1 : tensor<4x16xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @single_reduction
-func @single_reduction(%arg0 : tensor<4x8xf32>) -> tensor<4xf32> {
-  // CHECK-DAG: %[[INITIAL:.+]] = constant dense<0.000000e+00>
-  %0 = constant dense<0.000000e+00> : tensor<f32>
-  // CHECK-DAG: %[[WORKLOAD0:.+]] = constant 4 : index
-  // CHECK: %[[RESULT:.+]] = flow.dispatch.region
-  // CHECK-SAME: [%[[WORKLOAD0]] : index]
-  // CHECK-SAME: (%arg1 = %arg0 : tensor<4x8xf32>) -> (tensor<4xf32>)
-  // CHECK-NEXT: %[[CST_0:.+]] = constant dense<0.0
-  // CHECK-NEXT: = "mhlo.reduce"(%arg1, %[[CST_0]])
-  %1 = "mhlo.reduce"(%arg0, %0) ( {
-  ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>):
-    %2 = mhlo.add %arg1, %arg2 : tensor<f32>
-    "mhlo.return"(%2) : (tensor<f32>) -> ()
-  }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
-  // CHECK: flow.return
-  // CHECK: return %[[RESULT]] : tensor<4xf32>
-  return %1 : tensor<4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @multi_reduction
-func @multi_reduction(%arg0 : tensor<4x8xf32>, %arg1 : tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
-  // CHECK-DAG: %[[INITIALA:.+]] = constant dense<0.000000e+00>
-  %0 = constant dense<0.000000e+00> : tensor<f32>
-  // CHECK-DAG: %[[INITIALB:.+]] = constant dense<1.000000e+00>
-  %1 = constant dense<1.000000e+00> : tensor<f32>
-  // CHECK-DAG: %[[WORKLOAD0:.+]] = constant 4 : index
-  // CHECK: %[[RESULT:.+]]:2 = flow.dispatch.region
-  // CHECK-SAME: [%[[WORKLOAD0]] : index]
-  // CHECK-SAME: (%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>)
-  // CHECK-NEXT: %[[CST_0:.+]] = constant dense<0.0
-  // CHECK-NEXT: %[[CST_1:.+]] = constant dense<1.0
-  // CHECK-NEXT: = "mhlo.reduce"(%arg2, %arg3, %[[CST_0]], %[[CST_1]])
-  %2, %3 = "mhlo.reduce"(%arg0, %arg1, %0, %1) ( {
-  ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>):
-    %4 = mhlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
-    %5 = mhlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
-    "mhlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
-  }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
-  // CHECK: flow.return
-  // CHECK: return %[[RESULT]]#0, %[[RESULT]]#1 : tensor<4xf32>, tensor<4xf32>
-  return %2, %3 : tensor<4xf32>, tensor<4xf32>
-}
-
-// TODO(benvanik): windowed reduction.
-
-// -----
-
-// CHECK-LABEL: @clone_broadcast
-func @clone_broadcast(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %splatCst = constant dense<1.0> : tensor<f32>
-  // CHECK: flow.dispatch.region
-  // CHECK:     "mhlo.broadcast"
-  // CHECK:     mhlo.add
-  // CHECK: flow.dispatch.region
-  // CHECK:     "mhlo.dot"
-  // CHECK: flow.dispatch.region
-  // CHECK:     "mhlo.broadcast"
-  // CHECK:     mhlo.add
-  %0 = "mhlo.broadcast"(%splatCst) {broadcast_sizes = dense<[4, 4]> : tensor<2xi64>} : (tensor<f32>) -> tensor<4x4xf32>
-  %1 = "mhlo.add"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = "mhlo.dot"(%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %3 = "mhlo.add"(%0, %2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  return %3: tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @reshaped_dot
-func @reshaped_dot(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
-  // CHECK: flow.dispatch.region
-  // CHECK:     "mhlo.reshape"
-  // CHECK:     "mhlo.reshape"
-  // CHECK:     "mhlo.dot"
-  // CHECK:     "mhlo.reshape"
-  %0 = "mhlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32>
-  %1 = "mhlo.reshape"(%arg1) : (tensor<16xf32>) -> tensor<4x4xf32>
-  %2 = "mhlo.dot"(%0, %1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %3 = "mhlo.reshape"(%2) : (tensor<4x4xf32>) -> tensor<16xf32>
-  return %3 : tensor<16xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir
deleted file mode 100644
index 1290461..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_linalg.mlir
+++ /dev/null

@@ -1,52 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-func @constant_capture(%arg0 : tensor<10x20xf32>) -> tensor<10x20xf32> {
-  %cst1 = constant 1.0 : f32
-  %cst2 = constant dense<2.0> : tensor<10x20xf32>
-  %cst3 = constant dense<
-    [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]> : tensor<10xf32>
-  %0 = linalg.init_tensor [10, 20] : tensor<10x20xf32>
-  %1 = linalg.generic
-    {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                      affine_map<(d0, d1) -> (d0, d1)>,
-                      affine_map<(d0, d1) -> (d0)>,
-                      affine_map<(d0, d1) -> (d0, d1)>],
-     iterator_types = ["parallel", "parallel"]}
-    ins(%arg0, %cst2, %cst3
-      : tensor<10x20xf32>, tensor<10x20xf32>, tensor<10xf32>)
-    outs(%0 : tensor<10x20xf32>) {
-    ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : f32, %arg4 : f32):
-      %1 = addf %arg1, %cst1 : f32
-      %2 = mulf %1, %arg2 : f32
-      %3 = addf %2, %arg3 : f32
-      linalg.yield %3 : f32
-    } -> tensor<10x20xf32>
-  return %1 : tensor<10x20xf32>
-}
-//       CHECK: func @constant_capture
-// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9_]+]]: tensor<10x20xf32>
-//   CHECK-DAG:   %[[CST3:.+]] = constant dense<[1.000000e+00, 2.000000e+00,
-//  CHECK-SAME:     3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00,
-//  CHECK-SAME:     7.000000e+00, 8.000000e+00, 9.000000e+00, 1.000000e+01]>
-//       CHECK:   %[[RESULT:.+]] = flow.dispatch.region[%{{.+}} : index](
-//  CHECK-SAME:       %[[ARG1:[a-zA-Z0-9_]+]] = %[[ARG0]]
-//  CHECK-SAME:       %[[ARG2:[a-zA-Z0-9_]+]] = %[[CST3]]
-//  CHECK-SAME:     ) -> (tensor<10x20xf32>) {
-//   CHECK-DAG:   %[[CST1:.+]] = constant 1.000000e+00 : f32
-//   CHECK-DAG:   %[[CST2:.+]] = constant dense<2.000000e+00> : tensor<10x20xf32>
-//   CHECK-DAG:     %[[T0:.+]] = linalg.init_tensor [10, 20] : tensor<10x20xf32>
-//       CHECK:     %[[RETURN:.+]] = linalg.generic
-//  CHECK-SAME:       ins(%[[ARG1]], %[[CST2]], %[[ARG2]]
-//  CHECK-SAME:       ) outs(%[[T0]] : tensor<10x20xf32>) {
-//  CHECK-NEXT:       ^{{[a-zA-Z0-9]+}}(
-//  CHECK-SAME:         %[[ARG3:.[a-zA-Z0-9_]+]]: f32,
-//  CHECK-SAME:         %[[ARG4:.[a-zA-Z0-9_]+]]: f32,
-//  CHECK-SAME:         %[[ARG5:.[a-zA-Z0-9_]+]]: f32,
-//  CHECK-SAME:         %[[ARG6:.[a-zA-Z0-9_]+]]: f32)
-//       CHECK:         %[[T0:.+]] = addf %[[ARG3]], %[[CST1]]
-//       CHECK:         %[[T1:.+]] = mulf %[[T0]], %[[ARG4]]
-//       CHECK:         %[[T2:.+]] = addf %[[T1]], %[[ARG5]]
-//       CHECK:         linalg.yield %[[T2]]
-//       CHECK:       }
-//       CHECK:     flow.return %[[RETURN]]
-//       CHECK:   }

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
deleted file mode 100644
index 4212eaf..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
+++ /dev/null

@@ -1,32 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// -----
-// CHECK-LABEL: @singleDispatchWithShapes
-// CHECK-SAME: %[[A0:[^:[:space:]]+]]: tensor<?x4xf32>,
-// CHECK-SAME: %[[A1:[^:[:space:]]+]]: !shapex.ranked_shape<[?,4]>,
-// CHECK-SAME: %[[A2:[^:[:space:]]+]]: !shapex.ranked_shape<[?,4]>
-func @singleDispatchWithShapes(%arg0 : tensor<?x4xf32>,
-    %arg1 : !shapex.ranked_shape<[?,4]>, %arg2 : !shapex.ranked_shape<[?,4]>) -> tensor<?x4xf32> {
-  // Lead-in tie_shape should be preserved outside of the dispatch region.
-  // CHECK: %[[TS0:.+]] = shapex.tie_shape %[[A0]], %[[A1]]
-  %0 = shapex.tie_shape %arg0, %arg1 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
-  // Fragility: The order of CA? derives from the algorithm and is
-  // otherwise not load bearing. Since on a single line, this is difficult to
-  // make generic.
-  // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[UNUSED_WORKLOAD:.+]] : index](
-  // CHECK-SAME: %[[CA2:.+]] = %[[A2]] : !shapex.ranked_shape<[?,4]>,
-  // CHECK-SAME: %[[CA0:.+]] = %{{.+}} : tensor<?x4xf32>,
-  // CHECK-SAME: %[[CA1:.+]] = %[[A1]] : !shapex.ranked_shape<[?,4]>)
-    // Dispatch region should contain captured tie_shapes.
-    // CHECK: %[[R1:.+]] = shapex.tie_shape %[[CA0]], %[[CA1]]
-    // CHECK: %[[R2:.+]] = mhlo.add %[[R1]], %[[R1]]
-    // CHECK: %[[R3:.+]] = shapex.tie_shape %[[R2]], %[[CA2]]
-    // CHECK: flow.return %[[R3]]
-  %1 = mhlo.add %0, %0 : tensor<?x4xf32>
-  %2 = shapex.tie_shape %1, %arg2 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
-
-  // Lead-out tie_shape should be preserved outside of the dispatch region.
-  // CHECK: %[[R4:.+]] = shapex.tie_shape %[[R0]], %[[A2]]
-  // CHECK: return %[[R4]]
-  return %2 : tensor<?x4xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir
deleted file mode 100644
index b7219bd..0000000
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_std_fusion.mlir
+++ /dev/null

@@ -1,23 +0,0 @@
-// RUN: iree-opt -split-input-file -iree-flow-dispatchability-analysis -iree-flow-identify-dispatch-regions2 %s | IreeFileCheck %s
-
-// CHECK-LABEL: @empty
-func @empty() {
-  // CHECK-NEXT: return
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @stdElementwiseOps
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT: %[[WORKLOAD:.+]] = constant 4
-  // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
-  // CHECK-SAME: [%[[WORKLOAD]] : index]
-  // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> (tensor<4xf32>) {
-  // CHECK-NEXT:   %1 = addf %arg1, %arg1 : tensor<4xf32>
-  %0 = addf %arg0, %arg0 : tensor<4xf32>
-  // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
-  // CHECK-NEXT: }
-  // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
-  return %0 : tensor<4xf32>
-}

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
index 64c5f4b..1feda19 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir

@@ -8,36 +8,6 @@
 
 // -----
 
-func @stdElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = addf %arg0, %arg0 : tensor<4xf32>
-  %1 = subf %0, %arg0 : tensor<4xf32>
-  %2 = mulf %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-
-// CHECK-LABEL: flow.executable @stdElementwiseOps_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT:   flow.dispatch.entry @stdElementwiseOps_ex_dispatch_0
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @stdElementwiseOps_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:       %0 = addf %arg0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %1 = subf %0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %2 = mulf %1, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       return %2 : tensor<4xf32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @stdElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
-// CHECK-NEXT:       (%arg1: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:     %[[WORKLOAD:.+]] = constant 4 : index
-// CHECK-NEXT:     %1 = flow.dispatch @stdElementwiseOps_ex_dispatch_0::@stdElementwiseOps_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
-// CHECK-NEXT:     flow.return %1 : tensor<4xf32>
-// CHECK-NEXT:   }
-// CHECK-NEXT:   return %0 : tensor<4xf32>
-// CHECK-NEXT: }
-
-// -----
-
 func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
   %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
@@ -45,26 +15,24 @@
   return %2 : tensor<4xf32>
 }
 
-// CHECK-LABEL: flow.executable @hloElementwiseOps_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT:   flow.dispatch.entry @hloElementwiseOps_ex_dispatch_0
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @hloElementwiseOps_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       return %2 : tensor<4xf32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
-// CHECK-NEXT:       (%arg1: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:     %[[WORKLOAD:.+]] = constant 4 : index
-// CHECK-NEXT:     %1 = flow.dispatch @hloElementwiseOps_ex_dispatch_0::@hloElementwiseOps_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
-// CHECK-NEXT:     flow.return %1 : tensor<4xf32>
-// CHECK-NEXT:   }
-// CHECK-NEXT:   return %0 : tensor<4xf32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @hloElementwiseOps_dispatch_0 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT:   flow.dispatch.entry @hloElementwiseOps_dispatch_0
+//  CHECK-NEXT:   module {
+//  CHECK-NEXT:     func @hloElementwiseOps_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<writeonly:4xf32>) {
+//       CHECK:       %{{.+}} = linalg.generic
+//       CHECK:         %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+//  CHECK-NEXT:         %{{.+}} = subf %{{.+}}, %{{.+}} : f32
+//  CHECK-NEXT:         %{{.+}} = mulf %{{.+}}, %{{.+}} : f32
+//       CHECK: func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+//  CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4xf32>) -> tensor<4xf32> =
+//  CHECK-NEXT:       (%arg1: tensor<4xf32>) -> tensor<4xf32> {
+//   CHECK-DAG:     %[[C1:.+]] = constant 1 : index
+//   CHECK-DAG:     %[[C4:.+]] = constant 4 : index
+//  CHECK-NEXT:     %1 = flow.dispatch @hloElementwiseOps_dispatch_0::@hloElementwiseOps_dispatch_0[%[[C4]], %[[C1]], %[[C1]]](%arg1) : (tensor<4xf32>) -> tensor<4xf32>
+//  CHECK-NEXT:     flow.return %1 : tensor<4xf32>
+//  CHECK-NEXT:   }
+//  CHECK-NEXT:   return %0 : tensor<4xf32>
+//  CHECK-NEXT: }
 
 // -----
 
@@ -75,44 +43,35 @@
   return %2 : tensor<4x4xf32>
 }
 
-// CHECK-LABEL: flow.executable @interleavedDot_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_0
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @interleavedDot_ex_dispatch_0(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
-// CHECK-NEXT:       return %0 : tensor<4x4xf32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: flow.executable @interleavedDot_ex_dispatch_1 attributes {sym_visibility = "private"} {
-// CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_1
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @interleavedDot_ex_dispatch_1(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT:       return %0 : tensor<4x4xf32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: flow.executable @interleavedDot_ex_dispatch_2 attributes {sym_visibility = "private"} {
-// CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_2
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @interleavedDot_ex_dispatch_2(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = mhlo.multiply %arg0, %arg1 : tensor<4x4xf32>
-// CHECK-NEXT:       return %0 : tensor<4x4xf32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @interleavedDot(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x4xf32>) -> tensor<4x4xf32> =
-// CHECK-NEXT:        (%arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:     %[[WORKLOAD:.+]] = constant 16 : index
-// CHECK-NEXT:     %1 = flow.dispatch @interleavedDot_ex_dispatch_0::@interleavedDot_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT:     %2 = flow.dispatch @interleavedDot_ex_dispatch_1::@interleavedDot_ex_dispatch_1[%[[WORKLOAD]]](%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT:     %3 = flow.dispatch @interleavedDot_ex_dispatch_2::@interleavedDot_ex_dispatch_2[%[[WORKLOAD]]](%2, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-// CHECK-NEXT:     flow.return %3 : tensor<4x4xf32>
-// CHECK-NEXT:   }
-// CHECK-NEXT:   return %0 : tensor<4x4xf32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @interleavedDot_dispatch_0 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT:   flow.dispatch.entry @interleavedDot_dispatch_0
+//  CHECK-NEXT:   module {
+//  CHECK-NEXT:     func @interleavedDot_dispatch_0
+//       CHECK:       %{{.+}} = linalg.generic
+//       CHECK:         %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+//       CHECK: flow.executable @interleavedDot_dispatch_1 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT:   flow.dispatch.entry @interleavedDot_dispatch_1
+//  CHECK-NEXT:   module {
+//  CHECK-NEXT:     func @interleavedDot_dispatch_1
+//       CHECK:       %{{.+}} = linalg.matmul
+//       CHECK: flow.executable @interleavedDot_dispatch_2 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT:   flow.dispatch.entry @interleavedDot_dispatch_2
+//  CHECK-NEXT:   module {
+//  CHECK-NEXT:     func @interleavedDot_dispatch_2
+//       CHECK:       %{{.+}} = linalg.generic
+//       CHECK:         %{{.+}} = mulf %{{.+}}, %{{.+}} : f32
+//       CHECK: func @interleavedDot(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+//  CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x4xf32>) -> tensor<4x4xf32> =
+//  CHECK-NEXT:        (%arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
+//   CHECK-DAG:     %[[C1:.+]] = constant 1 : index
+//   CHECK-DAG:     %[[C4:.+]] = constant 4 : index
+//  CHECK-NEXT:     %1 = flow.dispatch @interleavedDot_dispatch_0::@interleavedDot_dispatch_0[%[[C4]], %[[C4]], %[[C1]]](%arg1) : (tensor<4x4xf32>) -> tensor<4x4xf32>
+//  CHECK-NEXT:     %2 = flow.dispatch @interleavedDot_dispatch_1::@interleavedDot_dispatch_1[%[[C4]], %[[C4]], %[[C1]]](%1, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+//  CHECK-NEXT:     %3 = flow.dispatch @interleavedDot_dispatch_2::@interleavedDot_dispatch_2[%[[C4]], %[[C4]], %[[C1]]](%2, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+//  CHECK-NEXT:     flow.return %3 : tensor<4x4xf32>
+//  CHECK-NEXT:   }
+//  CHECK-NEXT:   return %0 : tensor<4x4xf32>
+//  CHECK-NEXT: }
 
 // -----
 
@@ -126,25 +85,18 @@
   return %1 : tensor<4xf32>
 }
 
-// CHECK-LABEL: flow.executable @reduction_ex_dispatch_0 attributes {sym_visibility = "private"} {
-//  CHECK-NEXT:   flow.dispatch.entry @reduction_ex_dispatch_0
+// CHECK-LABEL: flow.executable @reduction_dispatch_0 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT:   flow.dispatch.entry @reduction_dispatch_0
 //  CHECK-NEXT:   module {
-//  CHECK-NEXT:     func @reduction_ex_dispatch_0(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
-//  CHECK-NEXT:       %cst = constant dense<0.000000e+00> : tensor<f32>
-//  CHECK-NEXT:       %0 = "mhlo.reduce"(%arg0, %cst) ( {
-//  CHECK-NEXT:       ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors
-//  CHECK-NEXT:         %1 = mhlo.add %arg1, %arg2 : tensor<f32>
-//  CHECK-NEXT:         "mhlo.return"(%1) : (tensor<f32>) -> ()
-//  CHECK-NEXT:       }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
-//  CHECK-NEXT:       return %0 : tensor<4xf32>
-//  CHECK-NEXT:     }
-//  CHECK-NEXT:   }
-//  CHECK-NEXT: }
-//  CHECK-NEXT: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
+//  CHECK-NEXT:     func @reduction_dispatch_0
+//       CHECK:       %{{.+}} = linalg.generic
+//       CHECK:         %{{.+}} = addf %{{.+}}, %{{.+}} : f32
+//       CHECK: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
 //  CHECK-NEXT:   %0 = flow.ex.stream.fragment(%arg0) : (tensor<4x8xf32>) -> tensor<4xf32> =
 //  CHECK-NEXT:       (%arg1: tensor<4x8xf32>) -> tensor<4xf32> {
-//  CHECK-NEXT:     %[[WORKLOAD:.+]] = constant 4 : index
-//  CHECK-NEXT:     %1 = flow.dispatch @reduction_ex_dispatch_0::@reduction_ex_dispatch_0[%[[WORKLOAD]]](%arg1) : (tensor<4x8xf32>) -> tensor<4xf32>
+//   CHECK-DAG:     %[[C1:.+]] = constant 1 : index
+//   CHECK-DAG:     %[[C4:.+]] = constant 4 : index
+//  CHECK-NEXT:     %1 = flow.dispatch @reduction_dispatch_0::@reduction_dispatch_0[%[[C4]], %[[C1]], %[[C1]]](%arg1) : (tensor<4x8xf32>) -> tensor<4xf32>
 //  CHECK-NEXT:     flow.return %1 : tensor<4xf32>
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   return %0 : tensor<4xf32>
@@ -158,27 +110,26 @@
   return %1 : tensor<2x4xi32>
 }
 
-// CHECK-LABEL: flow.executable @dynamicUpdateSlice_ex_dispatch_0 attributes {sym_visibility = "private"} {
-// CHECK-NEXT: flow.dispatch.entry @dynamicUpdateSlice_ex_dispatch_0
-// CHECK-NEXT:   module {
-// CHECK-NEXT:     func @dynamicUpdateSlice_ex_dispatch_0(%arg0: tensor<2x4xi32>, %arg1: tensor<2x4xi32>) -> tensor<2x4xi32> {
-// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg1 : tensor<2x4xi32>
-// CHECK-NEXT:       return %0 : tensor<2x4xi32>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
-// CHECK-NEXT: }
-// CHECK-NEXT: func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> {
-//  CHECK-DAG:   %[[ARG2_LOAD:.+]] = flow.tensor.load %arg2 : tensor<i32>
-//  CHECK-DAG:   %[[ARG2_INDEX:.+]] = index_cast %[[ARG2_LOAD]] : i32 to index
-//  CHECK-DAG:   %[[ARG3_LOAD:.+]] = flow.tensor.load %arg3 : tensor<i32>
-//  CHECK-DAG:   %[[ARG3_INDEX:.+]] = index_cast %[[ARG3_LOAD]] : i32 to index
-// CHECK-NEXT:   %[[RET:.+]] = flow.ex.stream.fragment(%arg0, %[[ARG2_INDEX]], %[[ARG3_INDEX]], %arg1) : (tensor<2x4xi32>, index, index, tensor<1x1xi32>) -> tensor<2x4xi32> =
-// CHECK-NEXT:        (%arg4: tensor<2x4xi32>, %arg5: index, %arg6: index, %arg7: tensor<1x1xi32>) -> tensor<2x4xi32> {
-// CHECK-NEXT:     %[[WORKLOAD:.+]] = constant 8 : index
-// CHECK-NEXT:     %[[ARG4_CLONE:.+]] = flow.tensor.clone %arg4 : tensor<2x4xi32>
-// CHECK-NEXT:     %[[T0:.+]] = flow.tensor.update %arg7, %arg4[%arg5, %arg6] : tensor<1x1xi32> -> tensor<2x4xi32>
-// CHECK-NEXT:     %[[T1:.+]] = flow.dispatch @dynamicUpdateSlice_ex_dispatch_0::@dynamicUpdateSlice_ex_dispatch_0[%[[WORKLOAD]]](%[[ARG4_CLONE]], %[[T0]]) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
-// CHECK-NEXT:     flow.return %[[T1]] : tensor<2x4xi32>
-// CHECK-NEXT:   }
-// CHECK-NEXT:   return %[[RET]] : tensor<2x4xi32>
-// CHECK-NEXT: }
+// CHECK-LABEL: flow.executable @dynamicUpdateSlice_dispatch_0 attributes {sym_visibility = "private"} {
+//  CHECK-NEXT: flow.dispatch.entry @dynamicUpdateSlice_dispatch_0
+//  CHECK-NEXT:   module {
+//  CHECK-NEXT:     func @dynamicUpdateSlice_dispatch_0
+//       CHECK:       %{{.+}} = linalg.generic
+//       CHECK:         %{{.+}} = addi %{{.+}}, %{{.+}} : i32
+//       CHECK: func @dynamicUpdateSlice(%arg0: tensor<2x4xi32>, %arg1: tensor<1x1xi32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<2x4xi32> {
+//   CHECK-DAG:   %[[ARG2_LOAD:.+]] = flow.tensor.load %arg2 : tensor<i32>
+//   CHECK-DAG:   %[[ARG2_INDEX:.+]] = index_cast %[[ARG2_LOAD]] : i32 to index
+//   CHECK-DAG:   %[[ARG3_LOAD:.+]] = flow.tensor.load %arg3 : tensor<i32>
+//   CHECK-DAG:   %[[ARG3_INDEX:.+]] = index_cast %[[ARG3_LOAD]] : i32 to index
+//  CHECK-NEXT:   %[[RET:.+]] = flow.ex.stream.fragment(%arg0, %[[ARG2_INDEX]], %[[ARG3_INDEX]], %arg1) : (tensor<2x4xi32>, index, index, tensor<1x1xi32>) -> tensor<2x4xi32> =
+//  CHECK-NEXT:        (%arg4: tensor<2x4xi32>, %arg5: index, %arg6: index, %arg7: tensor<1x1xi32>) -> tensor<2x4xi32> {
+//   CHECK-DAG:     %[[C1:.+]] = constant 1 : index
+//   CHECK-DAG:     %[[C2:.+]] = constant 2 : index
+//   CHECK-DAG:     %[[C4:.+]] = constant 4 : index
+//  CHECK-NEXT:     %[[ARG4_CLONE:.+]] = flow.tensor.clone %arg4 : tensor<2x4xi32>
+//  CHECK-NEXT:     %[[T0:.+]] = flow.tensor.update %arg7, %arg4[%arg5, %arg6] : tensor<1x1xi32> -> tensor<2x4xi32>
+//  CHECK-NEXT:     %[[T1:.+]] = flow.dispatch @dynamicUpdateSlice_dispatch_0::@dynamicUpdateSlice_dispatch_0[%[[C4]], %[[C2]], %[[C1]]](%[[ARG4_CLONE]], %[[T0]]) : (tensor<2x4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
+//  CHECK-NEXT:     flow.return %[[T1]] : tensor<2x4xi32>
+//  CHECK-NEXT:   }
+//  CHECK-NEXT:   return %[[RET]] : tensor<2x4xi32>
+//  CHECK-NEXT: }

diff --git a/iree/compiler/Dialect/Flow/Utils/BUILD b/iree/compiler/Dialect/Flow/Utils/BUILD
index a15ec70..a8260f8 100644
--- a/iree/compiler/Dialect/Flow/Utils/BUILD
+++ b/iree/compiler/Dialect/Flow/Utils/BUILD

@@ -21,22 +21,16 @@
 cc_library(
     name = "Utils",
     srcs = [
-        "DispatchUtils.cpp",
         "WorkloadUtils.cpp",
     ],
     hdrs = [
-        "DispatchUtils.h",
         "WorkloadUtils.h",
     ],
     deps = [
-        "//iree/compiler/Dialect/Flow/IR",
         "//iree/compiler/Dialect/Shape/IR",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:LinalgOps",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
-        "@llvm-project//mlir:TosaDialect",
-        "@mlir-hlo//:hlo",
     ],
 )

diff --git a/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt b/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt
index 47c410c..28b7f70 100644
--- a/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt
+++ b/iree/compiler/Dialect/Flow/Utils/CMakeLists.txt

@@ -14,21 +14,15 @@
   NAME
     Utils
   HDRS
-    "DispatchUtils.h"
     "WorkloadUtils.h"
   SRCS
-    "DispatchUtils.cpp"
     "WorkloadUtils.cpp"
   DEPS
     LLVMSupport
     MLIRIR
-    MLIRLinalg
     MLIRStandard
     MLIRSupport
-    MLIRTosa
-    iree::compiler::Dialect::Flow::IR
     iree::compiler::Dialect::Shape::IR
-    tensorflow::mlir_hlo
   PUBLIC
 )
 

diff --git a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
deleted file mode 100644
index 8c13cdb..0000000
--- a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
+++ /dev/null

@@ -1,208 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h"
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeDialect.h"
-#include "llvm/ADT/SetVector.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-bool isOpOfKnownDialect(Operation *op) {
-  if (!op->getDialect()) return false;
-  // TODO(benvanik): replace with op dispatchability interface to allow dialects
-  // to opt into dispatch.
-  auto dialectNamespace = op->getDialect()->getNamespace();
-  return dialectNamespace == FlowDialect::getDialectNamespace() ||
-         dialectNamespace == linalg::LinalgDialect::getDialectNamespace() ||
-         dialectNamespace == mhlo::MhloDialect::getDialectNamespace() ||
-         dialectNamespace == mlir::StandardOpsDialect::getDialectNamespace() ||
-         dialectNamespace == ShapeDialect::getDialectNamespace() ||
-         dialectNamespace == tosa::TosaDialect::getDialectNamespace();
-}
-
-namespace {
-
-// Returns the set of values that must be captured for use by |ops| and the
-// set of values defined by |ops| that are used outside of the set.
-LogicalResult analyzeOpRangeValues(ArrayRef<Operation *> ops,
-                                   llvm::SetVector<Value> *capturedValues,
-                                   llvm::SetVector<Value> *escapingValues) {
-  llvm::SmallDenseSet<Operation *> opSet;
-  opSet.reserve(ops.size());
-  opSet.insert(ops.begin(), ops.end());
-  for (auto *op : ops) {
-    for (auto value : op->getOperands()) {
-      if (!llvm::is_contained(opSet, value.getDefiningOp())) {
-        // Op is using a value not in the ops set, ensure we capture it.
-        capturedValues->insert(value);
-      }
-    }
-    for (auto value : op->getResults()) {
-      for (auto &use : value.getUses()) {
-        if (!llvm::is_contained(opSet, use.getOwner())) {
-          // An op outside of the ops set is using the value, needs to escape.
-          escapingValues->insert(value);
-          continue;
-        }
-      }
-    }
-  }
-  return success();
-}
-
-}  // namespace
-
-LogicalResult buildDispatchRegion(Block *parentBlock, Value workload,
-                                  ArrayRef<Operation *> ops) {
-  // Fused location with all ops.
-  SmallVector<Location, 16> opLocs;
-  for (auto *op : ops) {
-    opLocs.push_back(op->getLoc());
-  }
-  auto regionLoc = FusedLoc::get(workload.getContext(), opLocs);
-
-  // Get a list of values that we need to capture and values that escape the
-  // region and need to be returned.
-  llvm::SetVector<Value> capturedValues;
-  llvm::SetVector<Value> escapingValues;
-  if (failed(analyzeOpRangeValues(ops, &capturedValues, &escapingValues))) {
-    return failure();
-  }
-  SmallVector<Type, 8> escapingTypes;
-  for (auto value : escapingValues) escapingTypes.push_back(value.getType());
-
-  // Build the region op and add it to the parent block.
-  OpBuilder parentBuilder = OpBuilder::atBlockEnd(parentBlock);
-  parentBuilder.setInsertionPoint(ops.back());
-  auto dispatchRegionOp = parentBuilder.create<IREE::Flow::DispatchRegionOp>(
-      regionLoc, escapingTypes, workload, capturedValues.getArrayRef());
-
-  // Create the block and setup the arg mapping for captured values.
-  auto *regionBlock = new Block();
-  dispatchRegionOp.body().push_back(regionBlock);
-  OpBuilder regionBuilder = OpBuilder::atBlockEnd(regionBlock);
-  BlockAndValueMapping mapping;
-  for (auto capturedValue : capturedValues) {
-    auto blockArg = regionBlock->addArgument(capturedValue.getType());
-    mapping.map(capturedValue, blockArg);
-  }
-
-  // Clone ops into the new region block.
-  for (auto *op : ops) {
-    // Note that this updates the mapping with the new values (so at the end
-    // we have those new values).
-    regionBuilder.clone(*op, mapping);
-  }
-
-  // Return results (as we need a terminator in our block).
-  // These are all of the values that escape our region.
-  SmallVector<Value, 8> resultValues;
-  for (auto oldValue : escapingValues) {
-    resultValues.push_back(mapping.lookupOrDefault(oldValue));
-  }
-  regionBuilder.create<IREE::Flow::ReturnOp>(opLocs.back(), resultValues);
-
-  // Replace usage of values with the results of the region.
-  for (int i = 0; i < escapingValues.size(); ++i) {
-    escapingValues[i].replaceAllUsesWith(dispatchRegionOp.getResult(i));
-  }
-
-  // Remove original ops from the parent region.
-  for (auto it = ops.rbegin(); it != ops.rend(); ++it) {
-    (*it)->erase();
-  }
-
-  return success();
-}
-
-namespace {
-
-// Recursively finds all reachable functions from the given |rootFunc| and adds
-// them to the |reachableFuncs| set.
-//
-// Note that indirect calls are not supported, however we don't allow those in
-// dispatch regions anyway so they should not be present here.
-LogicalResult findReachableFunctions(
-    FuncOp rootFuncOp, llvm::SetVector<FuncOp> &reachableFuncs,
-    llvm::StringMap<FuncOp> &dispatchableFuncOps) {
-  llvm::SetVector<FuncOp> worklist;
-  worklist.insert(rootFuncOp);
-  while (!worklist.empty()) {
-    auto funcOp = worklist.pop_back_val();
-    funcOp.walk([&](CallOp callOp) {
-      auto calleeOp = dispatchableFuncOps.find(callOp.callee())->second;
-      if (reachableFuncs.insert(calleeOp)) {
-        worklist.insert(calleeOp);
-      }
-    });
-  }
-  return success();
-}
-
-}  // namespace
-
-ExecutableOp createExecutable(Location loc, StringRef executableName,
-                              ArrayRef<FuncOp> funcOps, ModuleOp parentModuleOp,
-                              llvm::StringMap<FuncOp> &dispatchableFuncOps) {
-  assert(!funcOps.empty() && "must have at least one entry function");
-
-  // Gather all reachable functions.
-  llvm::SetVector<FuncOp> reachableFuncs;
-  for (auto funcOp : funcOps) {
-    (void)findReachableFunctions(funcOp, reachableFuncs, dispatchableFuncOps);
-  }
-
-  // Create the executable that will contain the outlined region.
-  // NOTE: this will get uniquified if we have multiple in the same block.
-  OpBuilder parentModuleBuilder(&parentModuleOp.getBody()->back());
-  auto executableOp =
-      parentModuleBuilder.create<IREE::Flow::ExecutableOp>(loc, executableName);
-
-  // Create the inner ModuleOp that contains the original functions. We need
-  // to provide this shim as some ops (like std.call) look for the
-  // containing module to provide symbol resolution.
-  OpBuilder executableBuilder(executableOp);
-  executableBuilder.setInsertionPointToStart(&executableOp.getBlock());
-  auto innerModule = executableBuilder.create<ModuleOp>(loc);
-  for (auto funcOp : funcOps) {
-    innerModule.push_back(funcOp);
-  }
-
-  // Copy all reachable functions into the executable.
-  // Linker passes may dedupe these later on.
-  OpBuilder innerModuleBuilder = OpBuilder::atBlockEnd(innerModule.getBody());
-  innerModuleBuilder.setInsertionPoint(innerModule.getBody(),
-                                       ++innerModule.getBody()->begin());
-  for (auto reachableFunc : reachableFuncs) {
-    innerModuleBuilder.clone(*reachableFunc);
-  }
-
-  return executableOp;
-}
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h
deleted file mode 100644
index 3a53416..0000000
--- a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.h
+++ /dev/null

@@ -1,61 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Utilities for dispatch region and function manipulation.
-// These are shared between all dispatchable types such as the standard
-// dispatch region as well as dispatch-related types like reduction region.
-
-#ifndef IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_
-#define IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/LogicalResult.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace Flow {
-
-// Returns true if we know about this dialect and have special dispatchability
-// information about it.
-bool isOpOfKnownDialect(Operation *op);
-
-// Builds a new dispatch region with the given |ops|.
-// The region will capture all required values and return all values used
-// outside of the |ops| provided. The region will be inserted at the location of
-// the last operation in the set.
-//
-// All |ops| must be compatible with the |workload| specified as they will all
-// be dispatched with the same workgroup structure.
-// TODO(benvanik): ensure we want to insert at end. Maybe front?
-LogicalResult buildDispatchRegion(Block *parentBlock, Value workload,
-                                  ArrayRef<Operation *> ops);
-
-// Creates a flow.executable out of a set of functions, pulling in all other
-// functions reachable by the provided functions.
-ExecutableOp createExecutable(Location loc, StringRef executableName,
-                              ArrayRef<FuncOp> funcOps, ModuleOp parentModuleOp,
-                              llvm::StringMap<FuncOp> &dispatchableFuncOps);
-
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
-
-#endif  // IREE_COMPILER_DIALECT_FLOW_UTILS_DISPATCHUTILS_H_

diff --git a/iree/compiler/Dialect/HAL/Transforms/BUILD b/iree/compiler/Dialect/HAL/Transforms/BUILD
index 43832e6..99247a5 100644
--- a/iree/compiler/Dialect/HAL/Transforms/BUILD
+++ b/iree/compiler/Dialect/HAL/Transforms/BUILD

@@ -28,7 +28,6 @@
         "InlineDeviceSwitches.cpp",
         "LinkExecutables.cpp",
         "MaterializeConstantPoolBuffers.cpp",
-        "MaterializeInterfaces.cpp",
         "MaterializeInterfaces2.cpp",
         "MaterializeResourceCaches.cpp",
         "MemoizeDeviceQueries.cpp",

diff --git a/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt b/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt
index 86850b3..975f903 100644
--- a/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt

@@ -23,7 +23,6 @@
     "InlineDeviceSwitches.cpp"
     "LinkExecutables.cpp"
     "MaterializeConstantPoolBuffers.cpp"
-    "MaterializeInterfaces.cpp"
     "MaterializeInterfaces2.cpp"
     "MaterializeResourceCaches.cpp"
     "MemoizeDeviceQueries.cpp"

diff --git a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
deleted file mode 100644
index 57f5a8e..0000000
--- a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
+++ /dev/null

@@ -1,449 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-#include <utility>
-
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/HAL/Target/TargetBackend.h"
-#include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
-#include "iree/compiler/Dialect/HAL/Transforms/Passes.h"
-#include "iree/compiler/Dialect/HAL/Utils/TypeUtils.h"
-#include "llvm/ADT/StringSet.h"
-#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace IREE {
-namespace HAL {
-
-// Adds IO ops (such as hal.io.binding) and updates function signatures to use
-// them for their IO. We do this in a target-independent manner today so that we
-// can share the same descriptor set logic and parameter population code on the
-// scheduling side. In the future we could allow backends to opt into different
-// behavior.
-static llvm::Optional<IREE::HAL::InterfaceOp> declareInterfaceIO(
-    IREE::Flow::ExecutableOp sourceOp, IREE::HAL::ExecutableOp targetOp) {
-  auto moduleOp = sourceOp.getInnerModule();
-  OpBuilder executableBuilder(targetOp.getContext());
-  executableBuilder.setInsertionPointToStart(&targetOp.getBlock());
-
-  // NOTE: we assume right now that all entry points have the same signature.
-  SmallVector<FuncOp, 1> entryFuncOps;
-  SmallVector<Location, 1> entryLocs;
-  for (auto &op : sourceOp.getBlock()) {
-    if (auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) {
-      auto funcOp =
-          moduleOp.lookupSymbol<FuncOp>(dispatchEntryOp.function_ref());
-      entryFuncOps.push_back(funcOp);
-      entryLocs.push_back(dispatchEntryOp.getLoc());
-    }
-  }
-  auto interfaceLoc = executableBuilder.getFusedLoc(entryLocs);
-  auto interfaceOp = executableBuilder.create<IREE::HAL::InterfaceOp>(
-      interfaceLoc, "legacy_io");
-  OpBuilder interfaceBuilder(interfaceOp);
-  interfaceBuilder.setInsertionPointToStart(&interfaceOp.getBlock());
-
-  // Add one binding per argument and result. This matches the legacy interface
-  // and allows us to keep using the current binding setup on the scheduler
-  // side.
-  // NOTE: we assume right now that all entry points have the same signature.
-  // TODO(benvanik): replace when we have descriptor sets in the HAL IR.
-  auto anyFuncOp = entryFuncOps.front();
-  int nextBindingOrdinal = 0;
-  int pushConstantCount = 0;
-  for (auto inputType : llvm::enumerate(anyFuncOp.getType().getInputs())) {
-    if (inputType.value().isa<TensorType>()) {
-      int bindingOrdinal = nextBindingOrdinal++;
-      auto bindingName = "arg" + std::to_string(inputType.index());
-      interfaceBuilder.create<IREE::HAL::InterfaceBindingOp>(
-          interfaceLoc, bindingName, /*set=*/APInt(64, 0),
-          /*binding=*/APInt(64, bindingOrdinal),
-          IREE::HAL::DescriptorType::StorageBuffer,
-          IREE::HAL::MemoryAccessBitfield::Read);
-    } else if (auto indexType = inputType.value().dyn_cast<IndexType>()) {
-      ++pushConstantCount;
-    } else if (auto integerType = inputType.value().dyn_cast<IntegerType>()) {
-      if (integerType.getIntOrFloatBitWidth() != 32) {
-        emitError(interfaceLoc)
-            << "unsupported argument " << inputType.index() << " bit depth "
-            << integerType.getIntOrFloatBitWidth() << " (" << integerType
-            << "); only 32-bit values are supported right now";
-        return llvm::None;
-      }
-      ++pushConstantCount;
-    } else {
-      emitError(interfaceLoc)
-          << "unsupported interface function argument " << inputType.index()
-          << " type " << inputType.value()
-          << "; requires tensors or simple primitive values (i32, etc)";
-      return llvm::None;
-    }
-  }
-  for (auto outputType : llvm::enumerate(anyFuncOp.getType().getResults())) {
-    int bindingOrdinal = nextBindingOrdinal++;
-    auto bindingName = "ret" + std::to_string(outputType.index());
-    if (outputType.value().isa<TensorType>()) {
-      interfaceBuilder.create<IREE::HAL::InterfaceBindingOp>(
-          interfaceLoc, bindingName, /*set=*/APInt(64, 0),
-          /*binding=*/APInt(64, bindingOrdinal),
-          IREE::HAL::DescriptorType::StorageBuffer,
-          IREE::HAL::MemoryAccessBitfield::DiscardWrite);
-    } else {
-      emitError(interfaceLoc)
-          << "unsupported result " << outputType.index() << " type "
-          << outputType.value() << "; requires tensor types";
-      return llvm::None;
-    }
-  }
-
-  if (pushConstantCount > 0) {
-    interfaceOp->setAttr("push_constants",
-                         interfaceBuilder.getIndexAttr(pushConstantCount));
-  }
-
-  return interfaceOp;
-}
-
-// Converts a value to/from one supported by the ABI from/to an arbitrary tensor
-// type.
-//
-// Ideally we'd use some type-aware conversion to handle signed/unsigned
-// saturation vs. truncation. As an example, we'd want to zero-extend an
-// unsigned i4 to a signed i8. We also don't want to use HLO ops here, but the
-// standard ops (trunci, zexti, etc) are not supported by subsequent lowerings
-// and just cause pain.
-//
-// Example: `tensor<4xi8>` -> `tensor<4xi1>`
-//      or  `tensor<4xi1>` -> `tensor<4xi8>`
-static Value convertABITensorType(Location loc, Value sourceValue,
-                                  TensorType targetType, OpBuilder &builder) {
-  auto sourceType = sourceValue.getType().cast<TensorType>();
-  if (sourceType == targetType) {
-    return sourceValue;
-  }
-  // TODO(benvanik): use a type converter or a dialect interface.
-  return builder.createOrFold<mhlo::ConvertOp>(loc, sourceValue,
-                                               targetType.getElementType());
-}
-
-// Creates a new entry function that uses the hal.interface bindings to marshal
-// IO to the original entry function.
-// Invariants:
-//   - The thunk function generates loads for entries in the InterfaceOp
-//     based on category:
-//       1. Push constants
-//       2. Bindings
-//     Within a category, the order follows the order within the interface.
-//     Such an ordering can be useful for downstream code generation because
-//     it can often be necessary to reference primitives in the materialization
-//     of binding-based loads (i.e. for size calculations, etc). For any
-//     stronger guarantees or inter-load ordering constraints, downstream
-//     code generation must explicitly take non-determinism of argument
-//     ordering into account.
-static Optional<FuncOp> createDispatchEntryThunk(
-    FuncOp sourceFuncOp, IREE::HAL::InterfaceOp interfaceOp,
-    IREE::HAL::ExecutableTargetOp targetOp) {
-  // Clone the source FuncOp into the target then manipulate it into a
-  // dispatch entry thunk.
-  auto clonedFuncOp = sourceFuncOp.clone();
-  targetOp.getInnerModule().push_back(clonedFuncOp);
-
-  // Functions take all I/O through the interface API.
-  auto sourceFuncType = clonedFuncOp.getType();
-  auto thunkFuncType = FunctionType::get(clonedFuncOp.getContext(), {}, {});
-  auto thunkFuncOp = FuncOp::create(clonedFuncOp.getLoc(),
-                                    clonedFuncOp.getName(), thunkFuncType);
-  clonedFuncOp.setName((clonedFuncOp.getName() + "_impl").str());
-  clonedFuncOp.setPrivate();
-  clonedFuncOp->getParentRegion()->getBlocks().front().push_front(thunkFuncOp);
-
-  // For now we only support tensor types, so bindings are in order.
-  // In the future we will want to provide N:M mappings (as well as the
-  // information to compute offsets).
-  int binding = 0;
-  auto bindingOps = llvm::to_vector<4>(
-      interfaceOp.getBlock().getOps<IREE::HAL::InterfaceBindingOp>());
-
-  // Pull all arguments from the bindings.
-  auto *thunkEntryBlock = thunkFuncOp.addEntryBlock();
-  OpBuilder thunkEntryBuilder = OpBuilder::atBlockEnd(thunkEntryBlock);
-  Operation *firstNonConstOp = nullptr;
-  auto positionForNonConst = [&]() {
-    thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
-  };
-  auto positionForConst = [&]() {
-    if (firstNonConstOp) {
-      thunkEntryBuilder.setInsertionPoint(firstNonConstOp);
-    } else {
-      thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
-    }
-  };
-
-  // Create load ops, first for push constants with binding based loads after.
-  auto zeroOffset = thunkEntryBuilder.createOrFold<mlir::ConstantIndexOp>(
-      thunkFuncOp.getLoc(), 0);
-  SmallVector<Value, 4> operands;
-  int pushConstantOffset = 0;
-  for (auto inputType : sourceFuncType.getInputs()) {
-    if (auto sourceType = inputType.dyn_cast<TensorType>()) {
-      positionForNonConst();
-      auto bindingOp = bindingOps[binding++];
-      auto targetType = convertTensorTypeToABIType(sourceType);
-      auto loadOp = thunkEntryBuilder.create<IREE::HAL::InterfaceLoadTensorOp>(
-          thunkFuncOp.getLoc(), targetType,
-          thunkEntryBuilder.getSymbolRefAttr(
-              interfaceOp.sym_name(),
-              {thunkEntryBuilder.getSymbolRefAttr(bindingOp)}),
-          zeroOffset);
-      Value abiValue =
-          convertABITensorType(thunkFuncOp.getLoc(), loadOp.getResult(),
-                               sourceType, thunkEntryBuilder);
-      if (!abiValue) {
-        clonedFuncOp.emitError()
-            << "function argument type " << inputType
-            << " cannot be converted to a HAL ABI type " << targetType;
-        return llvm::None;
-      }
-      operands.push_back(abiValue);
-      firstNonConstOp = loadOp;
-    } else if (inputType.isa<IndexType>() || inputType.isa<IntegerType>()) {
-      positionForConst();
-      auto loadOp =
-          thunkEntryBuilder.create<IREE::HAL::InterfaceLoadConstantOp>(
-              thunkFuncOp.getLoc(), inputType, APInt(64, pushConstantOffset));
-      operands.push_back(loadOp.getResult());
-      ++pushConstantOffset;
-    } else {
-      clonedFuncOp.emitError() << "function argument type " << inputType
-                               << " is not valid for interface I/O";
-      return llvm::None;
-    }
-  }
-  thunkEntryBuilder.setInsertionPointToEnd(thunkEntryBlock);
-
-  // Call the original entry function.
-  auto callOp = thunkEntryBuilder.create<mlir::CallOp>(thunkFuncOp.getLoc(),
-                                                       clonedFuncOp, operands);
-
-  // Push all results to the bindings.
-  for (auto resultTypeValue :
-       llvm::zip(sourceFuncType.getResults(), callOp.getResults())) {
-    auto sourceType = std::get<0>(resultTypeValue).cast<TensorType>();
-    auto targetType = convertTensorTypeToABIType(sourceType);
-    Value resultValue = std::get<1>(resultTypeValue);
-    Value abiValue = convertABITensorType(thunkFuncOp.getLoc(), resultValue,
-                                          targetType, thunkEntryBuilder);
-    if (!abiValue) {
-      clonedFuncOp.emitError()
-          << "function result type " << resultValue.getType()
-          << " cannot be converted from HAL ABI type " << targetType;
-      return llvm::None;
-    }
-    auto bindingOp = bindingOps[binding++];
-    thunkEntryBuilder.create<IREE::HAL::InterfaceStoreTensorOp>(
-        thunkFuncOp.getLoc(), abiValue,
-        thunkEntryBuilder.getSymbolRefAttr(
-            interfaceOp.sym_name(),
-            {thunkEntryBuilder.getSymbolRefAttr(bindingOp)}),
-        zeroOffset);
-  }
-  thunkEntryBuilder.create<mlir::ReturnOp>(thunkFuncOp.getLoc());
-
-  return thunkFuncOp;
-}
-
-// Adds the entry point ops with assigned ordinals for each entry function.
-// The entry points will all use the provided |interfaceOp|.
-static LogicalResult declareEntryPointOps(
-    IREE::Flow::ExecutableOp sourceExecutableOp,
-    IREE::HAL::ExecutableOp targetExecutableOp,
-    IREE::HAL::InterfaceOp interfaceOp) {
-  auto targetOps =
-      targetExecutableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>();
-  for (auto targetOp : targetOps) {
-    OpBuilder builder(&targetOp.getBlock().front());
-
-    // For each Flow entry point, create a HAL entry point and dispatch thunk.
-    int nextOrdinal = 0;
-    for (auto &op : sourceExecutableOp.getBlock()) {
-      if (auto dispatchEntryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) {
-        auto sourceFuncOp =
-            sourceExecutableOp.getInnerModule().lookupSymbol<FuncOp>(
-                dispatchEntryOp.function_ref());
-
-        auto thunkFuncOp =
-            createDispatchEntryThunk(sourceFuncOp, interfaceOp, targetOp);
-        if (!thunkFuncOp.hasValue()) {
-          return failure();
-        }
-        dispatchEntryOp->setAttr(
-            "function_ref", builder.getSymbolRefAttr(thunkFuncOp.getValue()));
-
-        builder.create<IREE::HAL::ExecutableEntryPointOp>(
-            dispatchEntryOp.getLoc(),
-            builder.getStringAttr(dispatchEntryOp.function_ref()),
-            builder.getIndexAttr(nextOrdinal++),
-            builder.getSymbolRefAttr(interfaceOp),
-            TypeAttr::get(sourceFuncOp.getType()), ArrayAttr{});
-      }
-    }
-
-    // Copy interface bindings into the target module so symbol references work.
-    auto inlinedInterfaceOp = interfaceOp.clone();
-    inlinedInterfaceOp.setPrivate();
-    targetOp.getInnerModule().push_back(inlinedInterfaceOp);
-  }
-  return success();
-}
-
-// Creates zero or more hal.executable.target ops for each target backend.
-// The source op will contain the flow.executable contents and any attributes
-// the backend wants to carry along during transformation.
-static LogicalResult declareTargetOps(TargetOptions targetOptions,
-                                      IREE::Flow::ExecutableOp sourceOp,
-                                      IREE::HAL::ExecutableOp executableOp) {
-  // The user has specified what targets they want as a set of patterns. This
-  // matches against those patterns so vulkan-* may match vulkan-v1.1 and
-  // vulkan-v1.2.
-  auto targetBackends = matchTargetBackends(targetOptions.targets);
-  if (targetBackends.empty()) {
-    auto diagnostic = sourceOp.emitError();
-    diagnostic
-        << "no target backends available for executable translation; ensure "
-        << "they are linked in and the target options are properly "
-        << "specified. requested = [ ";
-    for (const auto &target : targetOptions.targets) {
-      diagnostic << "'" << target << "' ";
-    }
-    diagnostic << "], available = [ ";
-    for (const auto &target : getRegisteredTargetBackends()) {
-      diagnostic << "'" << target << "' ";
-    }
-    diagnostic << "]";
-    return diagnostic;
-  }
-
-  // Materialize all of the hal.executable.target ops for all backends we are
-  // targeting. Note that each backend may create zero or more target ops.
-  for (auto &targetBackend : targetBackends) {
-    targetBackend->declareTargetOps(sourceOp, executableOp);
-  }
-
-  // Ensure that at least one target op got created. If it didn't that means
-  // the executable cannot be translated and it's better to fail now.
-  if (executableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>().empty()) {
-    auto diagnostic = sourceOp.emitError();
-    diagnostic
-        << "no target backend was able to handle this executable; tried = [ ";
-    for (const auto &target : targetOptions.targets) {
-      diagnostic << "'" << target << "' ";
-    }
-    diagnostic << "]";
-    return diagnostic;
-  }
-
-  return success();
-}
-
-class MaterializeInterfacesPass
-    : public PassWrapper<MaterializeInterfacesPass, OperationPass<ModuleOp>> {
- public:
-  explicit MaterializeInterfacesPass(TargetOptions targetOptions)
-      : targetOptions_(targetOptions) {}
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::HAL::HALDialect>();
-
-    auto targetBackends = matchTargetBackends(targetOptions_.targets);
-    for (auto &targetBackend : targetBackends) {
-      targetBackend->getDependentDialects(registry);
-    }
-  }
-
-  void runOnOperation() override {
-    // Processes all executables within the input module and produce the output
-    // HAL ops. We should ensure all deduping is performed prior to this when
-    // it's easier to diff IR and where we still have the flow context.
-    auto sourceOps =
-        llvm::to_vector<32>(getOperation().getOps<IREE::Flow::ExecutableOp>());
-    for (auto sourceOp : sourceOps) {
-      // Don't touch tiled executables as the new pass takes care of that.
-      auto entryOps = sourceOp.getOps<IREE::Flow::DispatchEntryOp>();
-      if (entryOps.empty()) continue;
-      auto anyEntryOp = *entryOps.begin();
-      if (anyEntryOp.workgroup_rank().hasValue()) {
-        continue;
-      }
-
-      // Create the op that will contain the translated executables.
-      OpBuilder builder = OpBuilder::atBlockEnd(getOperation().getBody());
-      builder.setInsertionPointAfter(sourceOp);
-      auto executableOp = builder.create<IREE::HAL::ExecutableOp>(
-          sourceOp.getLoc(), sourceOp.getName());
-      executableOp.setVisibility(sourceOp.getVisibility());
-
-      // Add IO ops to define the bindings and how parameters are passed.
-      auto interfaceOp = declareInterfaceIO(sourceOp, executableOp);
-      if (!interfaceOp.hasValue()) {
-        return signalPassFailure();
-      }
-
-      // Embed the hal.executable.target ops for each source.
-      if (failed(declareTargetOps(targetOptions_, sourceOp, executableOp))) {
-        return signalPassFailure();
-      }
-
-      // Annotate the entry points.
-      // TODO(benvanik): allow entry points to use different interfaces.
-      if (failed(declareEntryPointOps(sourceOp, executableOp,
-                                      interfaceOp.getValue()))) {
-        return signalPassFailure();
-      }
-
-      sourceOp.erase();
-    }
-  }
-
- private:
-  TargetOptions targetOptions_;
-};
-
-std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfacesPass(
-    TargetOptions targetOptions) {
-  return std::make_unique<MaterializeInterfacesPass>(targetOptions);
-}
-
-static PassRegistration<MaterializeInterfacesPass> pass(
-    "iree-hal-materialize-interfaces",
-    "Materializes hal.executable ops from flow.executable ops (DEPRECATED)",
-    [] {
-      auto options = getTargetOptionsFromFlags();
-      return std::make_unique<MaterializeInterfacesPass>(options);
-    });
-
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir

diff --git a/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
index bc14850..ed3d498 100644
--- a/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/HAL/Transforms/Passes.cpp

@@ -66,7 +66,6 @@
   // Each executable needs a hal.interface to specify how the host and device
   // comminucate across the ABI boundary.
   passManager.addPass(createMaterializeInterfaces2Pass(targetOptions));
-  passManager.addPass(createMaterializeInterfacesPass(targetOptions));
 
   passManager.nest<ExecutableOp>().addNestedPass<ExecutableTargetOp>(
       createPropagateConstantWorkgroupInfoPass());

diff --git a/iree/compiler/Dialect/HAL/Transforms/Passes.h b/iree/compiler/Dialect/HAL/Transforms/Passes.h
index ee1200c..589e321 100644
--- a/iree/compiler/Dialect/HAL/Transforms/Passes.h
+++ b/iree/compiler/Dialect/HAL/Transforms/Passes.h

@@ -72,8 +72,6 @@
 // Defines hal.executables and hal.interfaces for flow.executable ops based on
 // usage within the module. Target backends are queried to check for support and
 // device placements are made.
-std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfacesPass(
-    TargetOptions targetOptions);
 std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfaces2Pass(
     TargetOptions targetOptions);
 
@@ -152,7 +150,6 @@
   createBenchmarkBatchDispatchesPass(/*repeatCount=*/1);
   createInlineDeviceSwitchesPass();
   createMemoizeDeviceQueriesPass();
-  createMaterializeInterfacesPass(targetOptions);
   createTranslateExecutablesPass(targetOptions);
   createLinkExecutablesPass(targetOptions);
   createResolveEntryPointOrdinalsPass();

diff --git a/iree/compiler/Dialect/HAL/Transforms/test/BUILD b/iree/compiler/Dialect/HAL/Transforms/test/BUILD
index b926056..05e537f 100644
--- a/iree/compiler/Dialect/HAL/Transforms/test/BUILD
+++ b/iree/compiler/Dialect/HAL/Transforms/test/BUILD

@@ -30,7 +30,6 @@
             "identify_constant_pools.mlir",
             "inline_device_switches.mlir",
             "materialize_constant_pool_buffers.mlir",
-            "materialize_interfaces.mlir",
             "materialize_interfaces2.mlir",
             "materialize_resource_caches.mlir",
             "memoize_device_queries.mlir",

diff --git a/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt b/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt
index 8515a64..925a3de 100644
--- a/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt

@@ -19,7 +19,6 @@
     "identify_constant_pools.mlir"
     "inline_device_switches.mlir"
     "materialize_constant_pool_buffers.mlir"
-    "materialize_interfaces.mlir"
     "materialize_interfaces2.mlir"
     "materialize_resource_caches.mlir"
     "memoize_device_queries.mlir"

diff --git a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
deleted file mode 100644
index c6ac12e..0000000
--- a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ /dev/null

@@ -1,109 +0,0 @@
-// RUN: iree-opt -allow-unregistered-dialect -split-input-file -iree-hal-materialize-interfaces -iree-hal-target-backends=vmvx %s | IreeFileCheck %s
-
-// CHECK-LABEL: hal.executable @simpleMath_ex_dispatch_0
-//   CHECK-DAG: hal.interface @legacy_io {
-//  CHECK-NEXT:   hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-//  CHECK-NEXT:   hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
-//  CHECK-NEXT: }
-//   CHECK-DAG: hal.executable.target @vmvx, filter="vmvx" {
-//   CHECK-DAG:   hal.executable.entry_point @simpleMath_rgn_dispatch_0 attributes {
-//  CHECK-SAME:     interface = @legacy_io,
-//  CHECK-SAME:     ordinal = 0 : index,
-//  CHECK-SAME:     signature = (tensor<4xf32>) -> tensor<4xf32>
-//  CHECK-SAME:   }
-flow.executable @simpleMath_ex_dispatch_0 {
-  flow.dispatch.entry @simpleMath_rgn_dispatch_0 attributes {
-    workload = 4 : index
-  }
-  // CHECK: module {
-  module {
-    // CHECK-NEXT: func @simpleMath_rgn_dispatch_0()
-    // CHECK-NEXT:   %[[ZERO:.+]] = constant 0 : index
-    // CHECK-NEXT:   %[[ARG0:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<4xf32>
-    // CHECK-NEXT:   %[[RET0:.+]] = call @simpleMath_rgn_dispatch_0_impl(%[[ARG0]]) : (tensor<4xf32>) -> tensor<4xf32>
-    // CHECK-NEXT:   hal.interface.store.tensor %[[RET0]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<4xf32>
-    // CHECK-NEXT:   return
-    // CHECK-NEXT: }
-    // CHECK-NEXT: func private @simpleMath_rgn_dispatch_0_impl
-    func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
-      return %0 : tensor<4xf32>
-    }
-    // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"}
-  }
-}
-
-// -----
-
-// CHECK-LABEL: hal.executable @bools_ex_dispatch_0
-//   CHECK-DAG: hal.interface @legacy_io {
-//  CHECK-NEXT:   hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-//  CHECK-NEXT:   hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-//  CHECK-NEXT:   hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-//  CHECK-NEXT: }
-//   CHECK-DAG: hal.executable.target @vmvx, filter="vmvx" {
-//   CHECK-DAG:   hal.executable.entry_point @bools_rgn_dispatch_0 attributes {
-//  CHECK-SAME:     interface = @legacy_io,
-//  CHECK-SAME:     ordinal = 0 : index,
-//  CHECK-SAME:     signature = (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1>
-//  CHECK-SAME:   }
-flow.executable @bools_ex_dispatch_0 {
-  flow.dispatch.entry @bools_rgn_dispatch_0 attributes {
-    workload = 4 : index
-  }
-  // CHECK: module {
-  module {
-    // CHECK-NEXT: func @bools_rgn_dispatch_0()
-    //  CHECK-DAG:   %[[ZERO:.+]] = constant 0 : index
-    //  CHECK-DAG:   %[[ARG0_I8:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<4xi8>
-    //  CHECK-DAG:   %[[ARG0_I1:.+]] = "mhlo.convert"(%[[ARG0_I8]]) : (tensor<4xi8>) -> tensor<4xi1>
-    //  CHECK-DAG:   %[[ARG1_I8:.+]] = hal.interface.load.tensor @legacy_io::@arg1, offset = %[[ZERO]] : tensor<4xi8>
-    //  CHECK-DAG:   %[[ARG1_I1:.+]] = "mhlo.convert"(%[[ARG1_I8]]) : (tensor<4xi8>) -> tensor<4xi1>
-    // CHECK-NEXT:   %[[RET0_I1:.+]] = call @bools_rgn_dispatch_0_impl(%[[ARG0_I1]], %[[ARG1_I1]]) : (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1>
-    // CHECK-NEXT:   %[[RET0_I8:.+]] = "mhlo.convert"(%[[RET0_I1]]) : (tensor<4xi1>) -> tensor<4xi8>
-    // CHECK-NEXT:   hal.interface.store.tensor %[[RET0_I8]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<4xi8>
-    // CHECK-NEXT:   return
-    // CHECK-NEXT: }
-    // CHECK-NEXT: func private @bools_rgn_dispatch_0_impl(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4xi1>
-    func @bools_rgn_dispatch_0(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4xi1> {
-      %0 = mhlo.and %arg0, %arg1 : tensor<4xi1>
-      %c = mhlo.constant dense<[false, false, true, false]> : tensor<4xi1>
-      %1 = mhlo.and %0, %c : tensor<4xi1>
-      return %1 : tensor<4xi1>
-    }
-    // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"}
-  }
-}
-
-// -----
-
-// CHECK-LABEL: hal.executable @shaped_dispatch
-//  CHECK-NEXT: hal.interface @legacy_io attributes {push_constants = 2 : index} {
-//  CHECK-NEXT:   hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-//  CHECK-NEXT:   hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
-//  CHECK-NEXT: }
-flow.executable @shaped_dispatch {
-  flow.dispatch.entry @entry
-  // CHECK: module {
-  module {
-    //      CHECK: func @entry() {
-    //  CHECK-NEXT:   %[[ZERO:.+]] = constant 0 : index
-    // Invariant: Constant loads emitted before binding (tensor) loads.
-    //  CHECK-NEXT:   %[[DIM0:.+]] = hal.interface.load.constant offset = 0 : index
-    //  CHECK-NEXT:   %[[DIM1:.+]] = hal.interface.load.constant offset = 1 : index
-    //  CHECK-NEXT:   %[[ARG0:.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = %[[ZERO]] : tensor<?x7x10xf32>
-    //  CHECK-NEXT:   %[[RET0:.+]] = call @entry_impl(%[[ARG0]], %[[DIM0]], %[[DIM1]]) : (tensor<?x7x10xf32>, index, index) -> tensor<7x?x10xf32>
-    //  CHECK-NEXT:   hal.interface.store.tensor %[[RET0]], @legacy_io::@ret0, offset = %[[ZERO]] : tensor<7x?x10xf32>
-    //  CHECK-NEXT:   return
-    //  CHECK-NEXT: }
-    //  CHECK-NEXT: func private @entry_impl
-    func @entry(%arg0: tensor<?x7x10xf32>, %arg1: index, %arg2: index) -> tensor<7x?x10xf32> {
-      %0 = shapex.make_ranked_shape %arg1 : (index) -> !shapex.ranked_shape<[?,7,10]>
-      %1 = shapex.make_ranked_shape %arg2 : (index) -> !shapex.ranked_shape<[7,?,10]>
-      %2 = shapex.tie_shape %arg0, %0 : tensor<?x7x10xf32>, !shapex.ranked_shape<[?,7,10]>
-      %3 = "mhlo.transpose"(%2) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<?x7x10xf32>) -> tensor<7x?x10xf32>
-      %4 = shapex.tie_shape %3, %1 : tensor<7x?x10xf32>, !shapex.ranked_shape<[7,?,10]>
-      return %4 : tensor<7x?x10xf32>
-    }
-  }
-}

diff --git a/iree/compiler/Translation/IREEVM.cpp b/iree/compiler/Translation/IREEVM.cpp
index 07dc6ac..e2028d5 100644
--- a/iree/compiler/Translation/IREEVM.cpp
+++ b/iree/compiler/Translation/IREEVM.cpp

@@ -142,15 +142,7 @@
     IREE::TFLite::buildTransformPassPipeline(passManager);
   }
   IREE::Flow::buildInputTransformPassPipeline(passManager);
-  // VMLA must go with legacy path.
-  bool dispatchLinalgOnTensors = true;
-  if (std::find(executableOptions.targets.begin(),
-                executableOptions.targets.end(),
-                "vmla") != executableOptions.targets.end()) {
-    dispatchLinalgOnTensors = false;
-  }
-  IREE::Flow::buildFlowTransformPassPipeline(passManager,
-                                             dispatchLinalgOnTensors);
+  IREE::Flow::buildFlowTransformPassPipeline(passManager);
   IREE::HAL::buildHALTransformPassPipeline(passManager, executableOptions);
   IREE::VM::buildVMTransformPassPipeline(passManager, targetOptions);
   passManager.addPass(mlir::iree_compiler::IREE::createDropCompilerHintsPass());

diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD
index 65fe4c5..6b326cd 100644
--- a/iree/test/e2e/regression/BUILD
+++ b/iree/test/e2e/regression/BUILD

@@ -42,7 +42,6 @@
             "dynamic_torch_index_select_negative.mlir",
             "dynamic_torch_index_select_scalar.mlir",
             "dynamic_torch_index_select_vector.mlir",
-            "executable_benchmark.mlir",
             "globals.mlir",
             "scalar.mlir",
             "trace_dispatch_tensors.mlir",

diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt
index 0c9985b..269a553 100644
--- a/iree/test/e2e/regression/CMakeLists.txt
+++ b/iree/test/e2e/regression/CMakeLists.txt

@@ -22,7 +22,6 @@
     "dynamic_torch_index_select_negative.mlir"
     "dynamic_torch_index_select_scalar.mlir"
     "dynamic_torch_index_select_vector.mlir"
-    "executable_benchmark.mlir"
     "globals.mlir"
     "scalar.mlir"
     "trace_dispatch_tensors.mlir"

diff --git a/iree/test/e2e/regression/executable_benchmark.mlir b/iree/test/e2e/regression/executable_benchmark.mlir
deleted file mode 100644
index 5490ec4..0000000
--- a/iree/test/e2e/regression/executable_benchmark.mlir
+++ /dev/null

@@ -1,13 +0,0 @@
-// Only checks registered benchmarks.
-// RUN: iree-translate --iree-hal-target-backends=vmvx -iree-flow-export-benchmark-funcs -iree-mlir-to-vm-bytecode-module %s | iree-benchmark-module --driver=vmvx --benchmark_list_tests=true | IreeFileCheck %s
-
-func @two_dispatch() -> (tensor<5x5xf32>, tensor<3x5xf32>) attributes { iree.module.export } {
-  %0 = iree.unfoldable_constant dense<1.0> : tensor<5x3xf32>
-  %1 = iree.unfoldable_constant dense<0.4> : tensor<3x5xf32>
-  %2 = "mhlo.dot"(%0, %1) : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
-  %3 = "mhlo.dot"(%1, %2) : (tensor<3x5xf32>, tensor<5x5xf32>) -> tensor<3x5xf32>
-  return %2, %3 : tensor<5x5xf32>, tensor<3x5xf32>
-}
-// CHECK: BM_two_dispatch_dispatch_0_benchmark
-// CHECK: BM_two_dispatch_dispatch_1_benchmark
-// CHECK: BM_two_dispatch_benchmark

diff --git a/iree/tools/BUILD b/iree/tools/BUILD
index 7ff2cda..9238ef5 100644
--- a/iree/tools/BUILD
+++ b/iree/tools/BUILD

@@ -83,7 +83,6 @@
     deps = [
         "//iree/compiler/Bindings/Native/Transforms",
         "//iree/compiler/Bindings/TFLite/Transforms",
-        "//iree/compiler/Dialect/Flow/Analysis",
         "//iree/compiler/Dialect/Flow/IR",
         "//iree/compiler/Dialect/Flow/Transforms",
         "//iree/compiler/Dialect/HAL/IR:HALDialect",

diff --git a/iree/tools/CMakeLists.txt b/iree/tools/CMakeLists.txt
index b8bed15..44c1d92 100644
--- a/iree/tools/CMakeLists.txt
+++ b/iree/tools/CMakeLists.txt

@@ -153,7 +153,6 @@
       MLIRIR
       iree::compiler::Bindings::Native::Transforms
       iree::compiler::Bindings::TFLite::Transforms
-      iree::compiler::Dialect::Flow::Analysis
       iree::compiler::Dialect::Flow::IR
       iree::compiler::Dialect::Flow::Transforms
       iree::compiler::Dialect::HAL::IR::HALDialect

diff --git a/iree/tools/init_iree_passes.h b/iree/tools/init_iree_passes.h
index b52f61b..d3a283e 100644
--- a/iree/tools/init_iree_passes.h
+++ b/iree/tools/init_iree_passes.h

@@ -24,7 +24,6 @@
 
 #include "iree/compiler/Bindings/Native/Transforms/Passes.h"
 #include "iree/compiler/Bindings/TFLite/Transforms/Passes.h"
-#include "iree/compiler/Dialect/Flow/Analysis/TestPasses.h"
 #include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
 #include "iree/compiler/Dialect/HAL/Transforms/Passes.h"
 #include "iree/compiler/Dialect/IREE/Transforms/Passes.h"
@@ -47,7 +46,6 @@
   IREE::TFLite::registerTransformPassPipeline();
 
   IREE::Flow::registerFlowPasses();
-  IREE::Flow::registerFlowAnalysisTestPasses();
   IREE::HAL::registerHALPasses();
   IREE::registerTransformPasses();
   Shape::registerShapeConversionPasses();
commit	636b76732de9b99e15c0cf462828df60714ce795	[log] [tgz]
author	Ben Vanik <benvanik@google.com>	Fri May 14 16:23:03 2021 -0700
committer	GitHub <noreply@github.com>	Fri May 14 16:23:03 2021 -0700
tree	7f218ecc409a5b7742b2f663ed440bb882fa2b31
parent	a0f9e3c5ce1ef5d702a15309d2847f5261135515 [diff]