Move peeling from CodegenStrategy to a LLVMCPU pass. (#12707)

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
index 81205f7..f7b894c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
@@ -27,6 +27,7 @@
         "LLVMCPULowerToUKernels.cpp",
         "LLVMCPUMaterializeEncodingPass.cpp",
         "LLVMCPUMmt4dVectorLowering.cpp",
+        "LLVMCPUPeel.cpp",
         "LLVMCPUSynchronizeSymbolVisibility.cpp",
         "LLVMCPUTensorPad.cpp",
         "LLVMCPUTileAndFuse.cpp",
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
index 44a0886..797e9f9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
@@ -31,6 +31,7 @@
     "LLVMCPULowerToUKernels.cpp"
     "LLVMCPUMaterializeEncodingPass.cpp"
     "LLVMCPUMmt4dVectorLowering.cpp"
+    "LLVMCPUPeel.cpp"
     "LLVMCPUSynchronizeSymbolVisibility.cpp"
     "LLVMCPUTensorPad.cpp"
     "LLVMCPUTileAndFuse.cpp"
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
new file mode 100644
index 0000000..534962d
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
@@ -0,0 +1,87 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/PassDetail.h"
+#include "iree/compiler/Codegen/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Transforms/Transforms.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#define DEBUG_TYPE "iree-llvmcpu-peel"
+
+namespace mlir {
+namespace iree_compiler {
+namespace {
+// Gathers tiled loops that aren't distribution loops from previous tiling
+// stages.
+void collectLoopsToPeel(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
+                        SmallVectorImpl<scf::ForOp> &loopsToPeel) {
+  if (!iree_compiler::getLoweringConfig(linalgOp)) return;
+  if (!linalgOp) return;
+
+  auto maxNumLoopsToPeel = linalgOp.getNumLoops();
+  Operation *currentOp = linalgOp;
+  for (int i = 0; i < maxNumLoopsToPeel; ++i) {
+    currentOp = currentOp->getParentOfType<scf::ForOp>();
+    auto loop = llvm::cast_or_null<scf::ForOp>(currentOp);
+    if (!loop || iree_compiler::isTiledAndDistributedLoop(loop)) {
+      break;
+    }
+    loopsToPeel.push_back(loop);
+  }
+
+  std::reverse(loopsToPeel.begin(), loopsToPeel.end());
+}
+
+class LLVMCPUPeelPass : public LLVMCPUPeelBase<LLVMCPUPeelPass> {
+ public:
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<tensor::TensorDialect, linalg::LinalgDialect,
+                    scf::SCFDialect>();
+  }
+  void runOnOperation() override;
+};
+
+void LLVMCPUPeelPass::runOnOperation() {
+  MLIRContext *context = &getContext();
+  auto funcOp = getOperation();
+  SmallVector<linalg::LinalgOp> candidates;
+  funcOp.walk([&](linalg::LinalgOp op) { candidates.push_back(op); });
+  for (auto linalgOp : candidates) {
+    LLVM_DEBUG(llvm::dbgs() << "candidate: " << linalgOp << "\n");
+
+    IRRewriter rewriter(context);
+    IRRewriter::InsertionGuard g(rewriter);
+    rewriter.setInsertionPointAfter(linalgOp);
+
+    SmallVector<scf::ForOp> loopsToPeel;
+    collectLoopsToPeel(rewriter, linalgOp, loopsToPeel);
+    linalg::peelLoops(rewriter, loopsToPeel);
+  }
+
+  RewritePatternSet patterns(context);
+  linalg::populateLinalgTilingCanonicalizationPatterns(patterns);
+  scf::populateSCFForLoopCanonicalizationPatterns(patterns);
+  memref::populateResolveRankedShapeTypeResultDimsPatterns(patterns);
+  context->getLoadedDialect<tensor::TensorDialect>()
+      ->getCanonicalizationPatterns(patterns);
+  if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
+    LLVM_DEBUG(llvm::dbgs() << "----- cleanup failed -----\n");
+    return signalPassFailure();
+  }
+}
+}  // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUPeelPass() {
+  return std::make_unique<LLVMCPUPeelPass>();
+}
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index e20146f..d2204c4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -332,14 +332,19 @@
                                              bool enableVectorMasking) {
   addTileAndDistributePasses(passManager);
 
-  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
-  {
     // Skip tiling reduction loops because this is expected to apply on copy ops
     // only.
+  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+  {
     LinalgSingleTilingExpertPassOptions options;
     options.tilingLevel =
         static_cast<int64_t>(StrategyTilingLevel::ParallelTiles);
-    options.peel = true;
+    nestedModulePM.addNestedPass<func::FuncOp>(
+        createLinalgSingleTilingExpertPass(options));
+  }
+  nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUPeelPass());
+  {
+    LinalgSingleTilingExpertPassOptions options;
     options.vectorize = true;
     options.enableVectorMasking = enableVectorMasking;
     nestedModulePM.addNestedPass<func::FuncOp>(
@@ -480,9 +485,12 @@
     nestedModulePM.addNestedPass<func::FuncOp>(createVectorizePadPass());
   }
 
+  if (enablePeeling) {
+    nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUPeelPass());
+  }
+
   {
     LinalgSingleTilingExpertPassOptions options;
-    options.peel = enablePeeling;
     options.vectorize = true;
     options.enableVectorMasking = enableVectorMasking;
     nestedModulePM.addNestedPass<func::FuncOp>(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
index 5451970..83990c3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
@@ -41,6 +41,7 @@
             "materialize_vmvx_launch_configuration.mlir",
             "materialize_x86_64_launch_configuration.mlir",
             "pad_conv_pipeline_tests.mlir",
+            "peel.mlir",
             "peel_and_vectorize.mlir",
             "pipeline_tests.mlir",
             "split_reduction_pipeline_tests.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
index 9514f4f..0ad36c6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
@@ -36,6 +36,7 @@
     "materialize_vmvx_launch_configuration.mlir"
     "materialize_x86_64_launch_configuration.mlir"
     "pad_conv_pipeline_tests.mlir"
+    "peel.mlir"
     "peel_and_vectorize.mlir"
     "pipeline_tests.mlir"
     "split_reduction_pipeline_tests.mlir"
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
new file mode 100644
index 0000000..65462a5
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
@@ -0,0 +1,74 @@
+// RUN: iree-opt --iree-llvmcpu-peel -split-input-file %s | FileCheck %s
+
+func.func @peel_static_matmul() {
+  %c16 = arith.constant 16 : index
+  %c49 = arith.constant 49 : index
+  %c8 = arith.constant 8 : index
+  %c32 = arith.constant 32 : index
+  %c0 = arith.constant 0 : index
+  %c512 = arith.constant 512 : index
+  %c128 = arith.constant 128 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<readonly:tensor<128x49xf32>>
+  %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : !flow.dispatch.tensor<readonly:tensor<49x512xf32>>
+  %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+  %workgroup_id_x = hal.interface.workgroup.id[0] : index
+  %workgroup_count_x = hal.interface.workgroup.count[0] : index
+  %workgroup_id_y = hal.interface.workgroup.id[1] : index
+  %workgroup_count_y = hal.interface.workgroup.count[1] : index
+  %3 = affine.apply affine_map<()[s0] -> (s0 * 65)>()[%workgroup_id_y]
+  %4 = affine.apply affine_map<()[s0] -> (s0 * 65)>()[%workgroup_count_y]
+  %5 = affine.apply affine_map<()[s0] -> (s0 * 65)>()[%workgroup_id_x]
+  %6 = affine.apply affine_map<()[s0] -> (s0 * 65)>()[%workgroup_count_x]
+  scf.for %arg0 = %3 to %c128 step %4 {
+    %7 = affine.min affine_map<(d0) -> (-d0 + 128, 65)>(%arg0)
+    %8 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [%7, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x49xf32>> -> tensor<?x49xf32>
+    scf.for %arg1 = %5 to %c512 step %6 {
+      %9 = affine.min affine_map<(d0) -> (-d0 + 512, 65)>(%arg1)
+      %10 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%7, %9], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>> -> tensor<?x?xf32>
+      %11 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [49, %9], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<49x512xf32>> -> tensor<49x?xf32>
+      %12 = scf.for %arg2 = %c0 to %7 step %c8 iter_args(%arg3 = %10) -> (tensor<?x?xf32>) {
+        %13 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 8)>(%arg2)[%7]
+        %extracted_slice = tensor.extract_slice %8[%arg2, 0] [%13, 49] [1, 1] : tensor<?x49xf32> to tensor<?x49xf32>
+        %14 = scf.for %arg4 = %c0 to %9 step %c32 iter_args(%arg5 = %arg3) -> (tensor<?x?xf32>) {
+          %15 = affine.min affine_map<(d0)[s0] -> (-d0 + s0, 32)>(%arg4)[%9]
+          %extracted_slice_0 = tensor.extract_slice %11[0, %arg4] [49, %15] [1, 1] : tensor<49x?xf32> to tensor<49x?xf32>
+          %extracted_slice_1 = tensor.extract_slice %arg5[%arg2, %arg4] [%13, %15] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+          %extracted_slice_2 = tensor.extract_slice %extracted_slice_1[0, 0] [%13, %15] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+          %16 = linalg.fill ins(%cst : f32) outs(%extracted_slice_2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+          %extracted_slice_3 = tensor.extract_slice %16[0, 0] [%13, %15] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+          %17 = scf.for %arg6 = %c0 to %c49 step %c16 iter_args(%arg7 = %extracted_slice_3) -> (tensor<?x?xf32>) {
+            %18 = affine.min affine_map<(d0) -> (-d0 + 49, 16)>(%arg6)
+            %extracted_slice_5 = tensor.extract_slice %extracted_slice[0, %arg6] [%13, %18] [1, 1] : tensor<?x49xf32> to tensor<?x?xf32>
+            %extracted_slice_6 = tensor.extract_slice %extracted_slice_0[%arg6, 0] [%18, %15] [1, 1] : tensor<49x?xf32> to tensor<?x?xf32>
+            %19 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[65, 65, 0], [8, 32, 0], [0, 0, 16]]>} ins(%extracted_slice_5, %extracted_slice_6 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg7 : tensor<?x?xf32>) -> tensor<?x?xf32>
+            scf.yield %19 : tensor<?x?xf32>
+          }
+          %inserted_slice = tensor.insert_slice %17 into %16[0, 0] [%13, %15] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+          %inserted_slice_4 = tensor.insert_slice %inserted_slice into %arg5[%arg2, %arg4] [%13, %15] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+          scf.yield %inserted_slice_4 : tensor<?x?xf32>
+        }
+        scf.yield %14 : tensor<?x?xf32>
+      }
+      flow.dispatch.tensor.store %12, %2, offsets = [%arg0, %arg1], sizes = [%7, %9], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+    }
+  }
+  return
+}
+// CHECK-LABEL: func.func @peel_static_matmul
+// CHECK:         scf.for
+// CHECK:           scf.for
+// CHECK:             scf.for
+// CHECK:               scf.for
+// CHECK:                 linalg.fill {{.*}} -> tensor<8x32xf32>
+// CHECK:                 %[[T0:.+]] = scf.for
+// CHECK:                   linalg.matmul {{.*}} tensor<8x32xf32>
+// CHECK:                 linalg.matmul {{.*}} outs(%[[T0]] : tensor<8x32xf32>) -> tensor<8x32xf32>
+// CHECK:               scf.for
+// CHECK:                 linalg.fill {{.*}} -> tensor<8x?xf32>
+// CHECK:                 %[[T1:.+]] = scf.for
+// CHECK:                   linalg.matmul {{.*}} tensor<8x?xf32>
+// CHECK:               scf.for
+// CHECK:                 linalg.fill {{.*}} -> tensor<?x?xf32>
+// CHECK:                 %[[T2:.+]] = scf.for
+// CHECK:                   linalg.matmul {{.*}} tensor<?x?xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Passes.h b/compiler/src/iree/compiler/Codegen/Passes.h
index 330c153..75fa7db 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Passes.h
@@ -300,6 +300,9 @@
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTensorPadPass(
     LLVMCPUTensorPadOption option = LLVMCPUTensorPadOption::ParallelDims);
 
+/// Pass to perform peeling on non-distributed loops.
+std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUPeelPass();
+
 /// Performs the final conversion to LLVM dialect.
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
     bool reassociateFpReordering = false);
diff --git a/compiler/src/iree/compiler/Codegen/Passes.td b/compiler/src/iree/compiler/Codegen/Passes.td
index 5bbe759..679d04f 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Passes.td
@@ -398,6 +398,13 @@
       "mlir::iree_compiler::createLLVMCPUTensorPadPass()";
 }
 
+def LLVMCPUPeel :
+    Pass<"iree-llvmcpu-peel", "func::FuncOp"> {
+  let summary = "Pass to perform peeling on non-distributed loops.";
+  let constructor =
+      "mlir::iree_compiler::createLLVMCPUPeelPass()";
+}
+
 def ConvertToLLVM :
     Pass<"iree-convert-to-llvm", "ModuleOp"> {
   let summary =
diff --git a/compiler/src/iree/compiler/Codegen/Sandbox/LinalgTensorCodegenDriver.cpp b/compiler/src/iree/compiler/Codegen/Sandbox/LinalgTensorCodegenDriver.cpp
index 6c293ac..013629a 100644
--- a/compiler/src/iree/compiler/Codegen/Sandbox/LinalgTensorCodegenDriver.cpp
+++ b/compiler/src/iree/compiler/Codegen/Sandbox/LinalgTensorCodegenDriver.cpp
@@ -29,7 +29,6 @@
 // using namespace mlir::linalg;
 
 using mlir::iree_compiler::IREE::LinalgExt::CodegenStrategy;
-using mlir::iree_compiler::IREE::LinalgExt::LinalgPeelOptions;
 using mlir::iree_compiler::IREE::LinalgExt::LinalgTransformationFilter;
 using mlir::iree_compiler::IREE::LinalgExt::LinalgTransforms;
 using mlir::iree_compiler::IREE::LinalgExt::LinalgVectorizationOptions;
@@ -363,7 +362,6 @@
     this->generalize = options.generalize;
     this->iteratorInterchange = options.iteratorInterchange;
     this->decomposeToLowerDimOp = options.decomposeToLowerDimOp;
-    this->peel = options.peel;
     this->vectorize = options.vectorize;
     this->enableVectorMasking = options.enableVectorMasking;
     this->vectorizePadding = options.vectorizePadding;
@@ -727,30 +725,6 @@
       SmallVector<int64_t>{hoistPaddings.begin(), hoistPaddings.end()});
   paddingOptions.setTransposePaddings(transposePaddingVectors);
 
-  // Gather tiled loops that aren't distribution loops from previous tiling
-  // stages.
-  LinalgPeelOptions peelingOptions;
-  peelingOptions.loopsToPeelComputationFunction =
-      [](OpBuilder &builder, Operation *op,
-         SmallVectorImpl<scf::ForOp> &loopsToPeel) {
-        if (!iree_compiler::getLoweringConfig(op)) return;
-        auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
-        if (!linalgOp) return;
-
-        auto maxNumLoopsToPeel = linalgOp.getNumLoops();
-        Operation *currentOp = op;
-        for (int i = 0; i < maxNumLoopsToPeel; ++i) {
-          currentOp = currentOp->getParentOfType<scf::ForOp>();
-          auto loop = llvm::cast_or_null<scf::ForOp>(currentOp);
-          if (!loop || iree_compiler::isTiledAndDistributedLoop(loop)) {
-            break;
-          }
-          loopsToPeel.push_back(loop);
-        }
-
-        std::reverse(loopsToPeel.begin(), loopsToPeel.end());
-      };
-
   LinalgVectorizationOptions vectorizationOptions;
   vectorizationOptions.setVectorizePadding(vectorizePadding);
   vectorizationOptions.setEnableVectorMasking(enableVectorMasking);
@@ -765,7 +739,6 @@
   strategy.tileIf(doTiling, anchorOpName, tilingOptions)
       .padIf(pad, anchorOpName, paddingOptions)
       .decomposeIf(decomposeToLowerDimOp)
-      .peelIf(peel, generalize ? genericOpName : anchorOpName, peelingOptions)
       .vectorizeIf(vectorize, generalize ? genericOpName : anchorOpName,
                    vectorizationOptions);
 
diff --git a/compiler/src/iree/compiler/Codegen/Sandbox/Passes.h b/compiler/src/iree/compiler/Codegen/Sandbox/Passes.h
index 9c4c954..2ca9da9 100644
--- a/compiler/src/iree/compiler/Codegen/Sandbox/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Sandbox/Passes.h
@@ -56,7 +56,6 @@
   bool generalize = false;
   SmallVector<int64_t> iteratorInterchange = {};
   bool decomposeToLowerDimOp = false;
-  bool peel = false;
   bool vectorize = false;
   bool enableVectorMasking = false;
   bool vectorizePadding = false;
diff --git a/compiler/src/iree/compiler/Codegen/Sandbox/Passes.td b/compiler/src/iree/compiler/Codegen/Sandbox/Passes.td
index 0834572..03188a6 100644
--- a/compiler/src/iree/compiler/Codegen/Sandbox/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Sandbox/Passes.td
@@ -131,10 +131,6 @@
       /*default=*/"false",
       "Convert named operations to lower-D named operations.">,
 
-    // Peeling options.
-    Option<"peel", "peel", "bool", /*default=*/"false",
-      "Peel loops enclosing the linalg op.">,
-
     // Vectorization options.
     Option<"vectorize", "vectorize", "bool", /*default=*/"false",
       "Rewrite the linalg op as a vector operation.">,
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.h b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.h
index 7b3ecd8..df5e1f5 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.h
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.h
@@ -239,18 +239,6 @@
     const LinalgExt::LinalgTransformationFilter &filter =
         LinalgExt::LinalgTransformationFilter());
 
-/// Create a LinalgStrategyPeelPass.
-using LoopsToPeelComputationFunction = std::function<void(
-    OpBuilder &, Operation *, SmallVectorImpl<scf::ForOp> &)>;
-
-struct LinalgPeelOptions {
-  LoopsToPeelComputationFunction loopsToPeelComputationFunction = nullptr;
-};
-std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPeelPass(
-    StringRef opName = "", const LinalgPeelOptions &opt = LinalgPeelOptions(),
-    const LinalgExt::LinalgTransformationFilter &filter =
-        LinalgExt::LinalgTransformationFilter());
-
 /// Create a LinalgStrategyVectorizePass.
 using VectorSizeComputationFunction =
     std::function<SmallVector<int64_t>(linalg::LinalgOp, ArrayRef<int64_t>)>;
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.td
index 14d7292..2fa4c97 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.td
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Passes/Passes.td
@@ -156,22 +156,6 @@
   ];
 }
 
-def LinalgStrategyPeelPass
-    : Pass<"iree-linalg-strategy-peel-pass", "func::FuncOp"> {
-  let summary = "Configurable pass to apply pattern-based linalg peeling.";
-  let constructor = "createLinalgStrategyPeelPass()";
-  let dependentDialects = [
-    "linalg::LinalgDialect",
-    "scf::SCFDialect"
-  ];
-  let options = [
-    Option<"anchorFuncName", "anchor-func", "std::string", /*default=*/"",
-      "Which func op is the anchor to latch on.">,
-    Option<"anchorOpName", "anchor-op", "std::string", /*default=*/"",
-      "Which linalg op within the func is the anchor to latch on.">,
-  ];
-}
-
 def LinalgStrategyVectorizePass
     : Pass<"iree-linalg-strategy-vectorize-pass", "func::FuncOp"> {
   let summary = "Configurable pass to apply pattern-based linalg vectorization.";
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/CodegenStrategy.h b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/CodegenStrategy.h
index 5147557..798b909 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/CodegenStrategy.h
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/CodegenStrategy.h
@@ -104,28 +104,6 @@
   }
 };
 
-/// Represent one application of createLinalgStrategyPeelPass.
-struct Peel : public Transformation {
-  explicit Peel(
-      LinalgPeelOptions options,
-      LinalgExt::LinalgTransformationFilter::FilterFunction f = nullptr)
-      : Transformation(std::move(f)), options(options) {}
-
-  Peel(StringRef name, LinalgPeelOptions options,
-       LinalgExt::LinalgTransformationFilter::FilterFunction f = nullptr)
-      : Transformation(std::move(f)), opName(name), options(options) {}
-
-  void
-  addToPassPipeline(OpPassManager &pm,
-                    LinalgExt::LinalgTransformationFilter m) const override {
-    pm.addPass(createLinalgStrategyPeelPass(opName, options, m));
-  }
-
-private:
-  std::string opName;
-  LinalgPeelOptions options;
-};
-
 /// Represent one application of createLinalgStrategyVectorizePass.
 struct Vectorize : public Transformation {
   explicit Vectorize(
@@ -231,21 +209,6 @@
       LinalgExt::LinalgTransformationFilter::FilterFunction f = nullptr) {
     return b ? decompose(std::move(f)) : *this;
   }
-  /// Append a pattern to peel 'LinalgOpType'.
-  CodegenStrategy &
-  peel(StringRef opName, const LinalgPeelOptions &options,
-       const LinalgExt::LinalgTransformationFilter::FilterFunction &f =
-           nullptr) {
-    transformationSequence.emplace_back(
-        std::make_unique<Peel>(opName, options, f));
-    return *this;
-  }
-  /// Conditionally append a pattern to peel 'LinalgOpType'.
-  CodegenStrategy &
-  peelIf(bool b, StringRef opName, const LinalgPeelOptions &options,
-         LinalgExt::LinalgTransformationFilter::FilterFunction f = nullptr) {
-    return b ? peel(opName, options, std::move(f)) : *this;
-  }
   /// Append a pattern to rewrite `LinalgOpType` as a vector operation.
   CodegenStrategy &vectorize(
       StringRef opName,
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
index 3efba4c..17a670a 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
@@ -342,75 +342,6 @@
 }
 
 namespace {
-
-///
-/// Linalg peeling patterns.
-///
-
-/// Compute the loops to peel and return them in a SmallVector. Loops will be
-/// peeled in order of appearance in the SmallVector. This order will impact the
-/// output IR. If an inner-to-outer order is provided, the peeled iterations of
-/// the outer loops will also contain the peeled inner loops. If an
-/// outer-to-inner order is provided, the peeled iterations of the outer loops
-/// will not contain any peeled inner loops.
-
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-struct LinalgPeelingPattern
-    : public OpInterfaceRewritePattern<linalg::LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
-  LinalgPeelingPattern(MLIRContext *context,
-                       LinalgExt::LinalgTransformationFilter f =
-                           LinalgExt::LinalgTransformationFilter(),
-                       LinalgPeelOptions options = LinalgPeelOptions(),
-                       PatternBenefit benefit = 1);
-
-  /// Construct a pattern specifically applied to `opName`.
-  LinalgPeelingPattern(StringRef opName, MLIRContext *context,
-                       LinalgPeelOptions options = LinalgPeelOptions(),
-                       LinalgExt::LinalgTransformationFilter f =
-                           LinalgExt::LinalgTransformationFilter(),
-                       PatternBenefit benefit = 1);
-
-  LogicalResult matchAndRewrite(linalg::LinalgOp linalgOp,
-                                PatternRewriter &rewriter) const override;
-
-private:
-  /// LinalgTransformMarker handles special attribute manipulations.
-  const LinalgExt::LinalgTransformationFilter filter;
-  /// Peeling options.
-  const LinalgPeelOptions options;
-};
-
-LinalgPeelingPattern::LinalgPeelingPattern(
-    MLIRContext *context, LinalgExt::LinalgTransformationFilter f,
-    LinalgPeelOptions options, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<linalg::LinalgOp>(context, benefit),
-      filter(std::move(f)), options(std::move(options)) {}
-
-LinalgPeelingPattern::LinalgPeelingPattern(
-    StringRef opName, MLIRContext *context, LinalgPeelOptions options,
-    LinalgExt::LinalgTransformationFilter f, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<linalg::LinalgOp>(context, benefit),
-      filter(f.addOpNameFilter(opName)), options(std::move(options)) {}
-
-LogicalResult
-LinalgPeelingPattern::matchAndRewrite(linalg::LinalgOp linalgOp,
-                                      PatternRewriter &rewriter) const {
-  if (failed(filter.checkAndNotify(rewriter, linalgOp)))
-    return failure();
-
-  // Increase marker counter even if peeling doesn't happen for this op.
-  filter.replaceLinalgTransformationFilter(rewriter, linalgOp);
-
-  if (!options.loopsToPeelComputationFunction)
-    return failure();
-
-  SmallVector<scf::ForOp, 4> loopsToPeel;
-  options.loopsToPeelComputationFunction(rewriter, linalgOp, loopsToPeel);
-  linalg::peelLoops(rewriter, loopsToPeel);
-  return success();
-}
-
 /// Configurable pass to apply pattern-based tiling and fusion.
 struct LinalgStrategyTileAndFusePass
     : public LinalgStrategyTileAndFusePassBase<LinalgStrategyTileAndFusePass> {
@@ -534,40 +465,6 @@
   LinalgExt::LinalgTransformationFilter filter;
 };
 
-/// Configurable pass to apply pattern-based linalg peeling.
-struct LinalgStrategyPeelPass
-    : public LinalgStrategyPeelPassBase<LinalgStrategyPeelPass> {
-
-  LinalgStrategyPeelPass() = default;
-
-  LinalgStrategyPeelPass(StringRef opName, LinalgPeelOptions opt,
-                         LinalgExt::LinalgTransformationFilter filt)
-      : options(std::move(opt)), filter(std::move(filt)) {
-    this->anchorOpName.setValue(opName.str());
-  }
-
-  void runOnOperation() override {
-    auto funcOp = getOperation();
-    if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName)
-      return;
-
-    RewritePatternSet peelingPatterns(funcOp.getContext());
-    if (!anchorOpName.empty()) {
-      peelingPatterns.add<LinalgPeelingPattern>(
-          anchorOpName, funcOp.getContext(), options, filter);
-    } else {
-      peelingPatterns.add<LinalgPeelingPattern>(funcOp.getContext(), filter,
-                                                options);
-    }
-    if (failed(
-            applyPatternsAndFoldGreedily(funcOp, std::move(peelingPatterns))))
-      return signalPassFailure();
-  }
-
-  LinalgPeelOptions options;
-  LinalgExt::LinalgTransformationFilter filter;
-};
-
 /// Configurable pass to apply pattern-based linalg vectorization.
 struct LinalgStrategyVectorizePass
     : public LinalgStrategyVectorizePassBase<LinalgStrategyVectorizePass> {
@@ -796,13 +693,6 @@
   return std::make_unique<LinalgStrategyDecomposePass>(filter);
 }
 
-/// Create a LinalgStrategyPeelPass.
-std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPeelPass(
-    StringRef opName, const LinalgPeelOptions &opt,
-    const LinalgExt::LinalgTransformationFilter &filter) {
-  return std::make_unique<LinalgStrategyPeelPass>(opName, opt, filter);
-}
-
 /// Create a LinalgStrategyVectorizePass.
 std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyVectorizePass(
     StringRef opName, const LinalgVectorizationOptions &options,