[DT] Retire UpperBoundTileSizeOp op and relevant passes. (#18045)

This is a followup for
https://github.com/iree-org/iree/commit/9aaae342bde3a469e5156214669866fd4ba57531,
the op is no longer needed.

Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
index 4edaffa..cb46bd5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
@@ -443,55 +443,6 @@
       };
 }
 
-// Like getMaterializeEncodingFn, but iterating over an array of targets and
-// returning the max of all tile sizes from each target, checking that other
-// materialization info (permutations) agree.
-//
-// This is useful to compute padding amounts, in the materialization of
-// UpperBoundTileSizeOp, in top-level functions that are not part of one HAL
-// executable variant. There, the padding amounts only control the size of
-// allocated buffers, so it's OK to over-estimate (only wasting some memory)
-// but not under-estimate (would cause buffer overruns) padding amounts.
-static MaterializeEncodingFn getUpperBoundMaterializeEncodingFn(
-    ArrayRef<IREE::HAL::ExecutableTargetAttr> targetAttrs) {
-  return
-      [targetAttrs](
-          RankedTensorType tensorType) -> FailureOr<MaterializeEncodingInfo> {
-        FailureOr<MaterializeEncodingInfo> result; // Defaults to failure.
-        for (auto targetAttr : targetAttrs) {
-          FailureOr<MaterializeEncodingInfo> info =
-              materializeEncodingForTarget(tensorType, targetAttr);
-          if (failed(info)) {
-            // No info at this iteration. Ignore and continue.
-            continue;
-          }
-          if (failed(result)) {
-            // No preexisting result. Use this iteration's info and continue.
-            result = info;
-            continue;
-          }
-          // Merge this iteration's info into preexisting result info.
-          // Check that permutations match, then record the max of tile sizes.
-          if (info->innerDimsPos != result->innerDimsPos ||
-              info->outerDimsPerm != result->outerDimsPerm) {
-            return failure();
-          }
-          if (info->innerTileSizes.size() != result->innerTileSizes.size()) {
-            return failure();
-          }
-          for (unsigned i = 0; i < info->innerTileSizes.size(); ++i) {
-            if (ShapedType::isDynamic(info->innerTileSizes[i])) {
-              result->innerTileSizes[i] = ShapedType::kDynamic;
-            } else {
-              result->innerTileSizes[i] =
-                  std::max(result->innerTileSizes[i], info->innerTileSizes[i]);
-            }
-          }
-        }
-        return result;
-      };
-}
-
 static FailureOr<MaterializeEncodingValueInfo>
 chooseDynamicEncodingInfoVMVXMicrokernels(RankedTensorType tensorType,
                                           OpBuilder &builder, Location loc) {
@@ -665,66 +616,4 @@
   return std::make_unique<CPUMaterializeDeviceEncodingPass>();
 }
 
-// NOTE: this runs on host modules.
-struct CPUMaterializeUpperBoundTileSizePass
-    : public CPUMaterializeUpperBoundTileSizeBase<
-          CPUMaterializeUpperBoundTileSizePass> {
-  CPUMaterializeUpperBoundTileSizePass() = default;
-
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<arith::ArithDialect>();
-  }
-
-  void runOnOperation() override {
-    auto moduleOp = getOperation();
-
-    // Run required analysis passes.
-    IREE::Stream::AffinityAnalysis affinityAnalysis(moduleOp);
-    if (failed(affinityAnalysis.run())) {
-      return signalPassFailure();
-    }
-    IREE::HAL::DeviceAnalysis deviceAnalysis(moduleOp);
-    if (failed(deviceAnalysis.run())) {
-      return signalPassFailure();
-    }
-
-    for (auto funcOp : moduleOp.getOps<FunctionOpInterface>()) {
-      // Gather the required executable targets for the function. Note that it's
-      // possible there are more required for ops nested within the function but
-      // this pass is a hack and can't handle that :shrug:.
-      auto executableTargets = getFuncExecutableTargetAttrs(
-          funcOp, affinityAnalysis, deviceAnalysis);
-      if (!executableTargets) {
-        funcOp.emitOpError()
-            << "could not determine executable targets for the function";
-        return signalPassFailure();
-      } else if (executableTargets->empty()) {
-        // Probably no tensors.
-        continue;
-      }
-
-      // Get patterns specialized for the executable targets used by the
-      // function.
-      RewritePatternSet patterns(&getContext());
-      MaterializeEncodingFn materializeEncodingFn =
-          getUpperBoundMaterializeEncodingFn(executableTargets->getArrayRef());
-      if (!materializeEncodingFn)
-        return signalPassFailure();
-      populateMaterializeUpperBoundTileSizePatterns(patterns,
-                                                    materializeEncodingFn);
-
-      // Run patterns on the function.
-      if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
-        funcOp.emitOpError(
-            "encoding padding sizes materialization pattern failed");
-        return signalPassFailure();
-      }
-    }
-  }
-};
-
-std::unique_ptr<Pass> createCPUMaterializeUpperBoundTileSizePass() {
-  return std::make_unique<CPUMaterializeUpperBoundTileSizePass>();
-}
-
 } // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h
index f5f9a31..e6e39a0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h
@@ -26,23 +26,6 @@
 std::unique_ptr<Pass> createCPUMaterializeHostEncodingPass();
 std::unique_ptr<Pass> createCPUMaterializeDeviceEncodingPass();
 
-/// Like createLLVMCPUMaterializeEncodingPass, but specifically for
-/// encoding.upper_bound_tile_size, converting it to constants.
-///
-/// Unlike createLLVMCPUMaterializeEncodingPass, this does not require the
-/// op to have a specific HAL target attribute. Instead, this will iterate over
-/// all HAL target attributes, use the maximum of all padding sizes from each
-/// target. This is needed because in top-level functions outside of HAL
-/// executables, there are upper_bound_tile_size ops (created by SetEncoding,
-/// and computing buffer allocation sizes) and there isn't one specific HAL
-/// target.
-///
-/// In the VMVX case where padding sizes are not compile-time constants, this
-/// converts upper_bound_tile_size to some specific constant size (currently 16)
-/// that is the largest tile size that we can use in VMVX, and can be adjusted
-// as needed.
-std::unique_ptr<Pass> createCPUMaterializeUpperBoundTileSizePass();
-
 /// Adds CPU bufferization passes to the pipeline.
 void addCPUBufferizePasses(OpPassManager &funcPassManager);
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td
index 6329c53..dd120bb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td
@@ -25,12 +25,6 @@
   let constructor = "mlir::iree_compiler::createCPUMaterializeDeviceEncodingPass()";
 }
 
-def CPUMaterializeUpperBoundTileSize :
-    Pass<"iree-codegen-cpu-materialize-upper-bound-tile-size", "mlir::ModuleOp"> {
-  let summary = "Materialize upper_bound_tile_size to constants.";
-  let constructor = "mlir::iree_compiler::createCPUMaterializeUpperBoundTileSizePass()";
-}
-
 def CPULowerToUKernels :
     Pass<"iree-codegen-cpu-lower-to-ukernels", ""> {
   let summary =
diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
index 42b4438..6312d80 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
@@ -96,9 +96,6 @@
     MaterializeEncodingTypeConverter &typeConverter,
     MaterializeEncodingValueFn materializeEncodingValueFn);
 
-void populateMaterializeUpperBoundTileSizePatterns(
-    RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn);
-
 // Returns true if `encoding` represents a narrow-N matmul RESULT, e.g. the
 // result of a matvec.
 bool isNarrowNResult(IREE::Encoding::EncodingAttr encoding);
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
index 6fc4dbd..08d1bb1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
@@ -54,18 +54,6 @@
       return signalPassFailure();
     }
 
-    {
-      RewritePatternSet patterns(context);
-      populateMaterializeUpperBoundTileSizePatterns(patterns,
-                                                    materializeEncodingFn);
-      if (failed(
-              applyPatternsAndFoldGreedily(operation, std::move(patterns)))) {
-        operation.emitOpError(
-            "encoding padding sizes materialization pattern failed");
-        return signalPassFailure();
-      }
-    }
-
     // Add patterns to resolve dims ops and cleanups.
     {
       RewritePatternSet patterns(context);
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
index 281c398..9bf27aa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
@@ -289,38 +289,6 @@
       *innerTileSizesOfr, materializeEncodingInfo->outerDimsPerm);
 }
 
-static FailureOr<SmallVector<Value>> lowerUpperBoundTileSizeOpToConstants(
-    RewriterBase &rewriter,
-    IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp,
-    MaterializeEncodingFn materializeEncodingFn) {
-  Location loc = upperBoundTileSizeOp.getLoc();
-  RankedTensorType tensorType = upperBoundTileSizeOp.getTensorType();
-  FailureOr<MaterializeEncodingInfo> materializeEncodingInfo =
-      materializeEncodingFn(tensorType);
-  if (failed(materializeEncodingInfo)) {
-    return rewriter.notifyMatchFailure(upperBoundTileSizeOp,
-                                       "unhandled source encoding");
-  }
-  ArrayRef<int64_t> innerTileSizes = materializeEncodingInfo->innerTileSizes;
-  ArrayRef<int64_t> innerDimsPos = materializeEncodingInfo->innerDimsPos;
-  SmallVector<Value> results(tensorType.getRank());
-  for (unsigned i = 0; i < innerTileSizes.size(); ++i) {
-    int64_t tileSize = innerTileSizes[i];
-    if (ShapedType::isDynamic(tileSize)) {
-      tileSize = 16;
-    }
-    results[innerDimsPos[i]] =
-        rewriter.create<arith::ConstantIndexOp>(loc, tileSize);
-  }
-  // For the dims that have no inner tiles, use 1 as tile size to avoid padding.
-  for (unsigned i = 0; i < results.size(); ++i) {
-    if (!results[i]) {
-      results[i] = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    }
-  }
-  return results;
-}
-
 static FailureOr<Operation *>
 lowerContractionOpWithEncoding(RewriterBase &rewriter,
                                linalg::LinalgOp linalgOp, ValueRange operands,
@@ -788,36 +756,6 @@
   }
 };
 
-/// Convert `upper_bound_tile_size` op to `constant` op. If the
-/// `materializeEncodingFn` returns a failure, the pattern will materialize it
-/// to the same shape.
-struct UpperBoundTileSizeToConstantOpConversion
-    : public OpRewritePattern<IREE::Encoding::UpperBoundTileSizeOp> {
-  UpperBoundTileSizeToConstantOpConversion(
-      MLIRContext *context, MaterializeEncodingFn materializeEncodingFn)
-      : OpRewritePattern<IREE::Encoding::UpperBoundTileSizeOp>(context),
-        materializeEncodingFn(materializeEncodingFn) {}
-
-  LogicalResult
-  matchAndRewrite(IREE::Encoding::UpperBoundTileSizeOp upperBoundTileSizeOp,
-                  PatternRewriter &rewriter) const override {
-
-    auto constants = lowerUpperBoundTileSizeOpToConstants(
-        rewriter, upperBoundTileSizeOp, materializeEncodingFn);
-    if (failed(constants)) {
-      SmallVector<Value> results(upperBoundTileSizeOp.getNumResults(),
-                                 rewriter.create<arith::ConstantIndexOp>(
-                                     upperBoundTileSizeOp.getLoc(), 1));
-      rewriter.replaceOp(upperBoundTileSizeOp, results);
-      return success();
-    }
-    rewriter.replaceOp(upperBoundTileSizeOp, *constants);
-    return success();
-  }
-
-  MaterializeEncodingFn materializeEncodingFn;
-};
-
 /// Generic pattern to convert operation that is in Destination Passing Style.
 template <typename OpTy>
 struct MaterializeDPSOperation : public OpMaterializeEncodingPattern<OpTy> {
@@ -959,10 +897,4 @@
       context, typeConverter, materializeEncodingValueFn);
 }
 
-void populateMaterializeUpperBoundTileSizePatterns(
-    RewritePatternSet &patterns, MaterializeEncodingFn materializeEncodingFn) {
-  patterns.insert<UpperBoundTileSizeToConstantOpConversion>(
-      patterns.getContext(), materializeEncodingFn);
-}
-
 } // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td
index 733c67d..6e42b34 100644
--- a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td
+++ b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingOps.td
@@ -54,29 +54,6 @@
 }
 
 //===----------------------------------------------------------------------===//
-// upper_bound_tile_size op.
-//===----------------------------------------------------------------------===//
-
-def IREEEncoding_UpperBoundTileSizeOp : IREEEncoding_PureOp<"upper_bound_tile_size",
-    [Pure]> {
-  let summary = "returns an upper bound on tile sizes";
-  let description = [{
-    This returns the largest tile sizes that might result from materialization
-    of the given encoding. This can be used outside of target-specific code, so
-    there may be multiple targets, and this will return the maximum tile size
-    from iterating over all of them. The evaluation happens in the
-    MaterializeUpperBoundTileSize pass.
-  }];
-
-  let arguments = (ins TypeAttrOf<AnyRankedTensor>:$tensorType);
-  let results = (outs Variadic<Index>:$results);
-
-  let assemblyFormat = [{
-    attr-dict $tensorType `->` type($results)
-  }];
-}
-
-//===----------------------------------------------------------------------===//
 // unset_encoding op.
 //===----------------------------------------------------------------------===//
 
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
index 8310b6a..863c939 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
@@ -8,7 +8,6 @@
 
 #include <memory>
 
-#include "iree/compiler/Codegen/Common/CPU/Passes.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "iree/compiler/Dialect/HAL/Target/Devices/LocalDevice.h"
@@ -313,12 +312,6 @@
     buildHALConfigurationPassPipeline(passManager, targetRegistry,
                                       targetOptions, hooks);
 
-    // HACK: this should not be here and will be going away. It exists for
-    // lowering iree_linalg_ext.upper_bound_tile_size ops that exist on the
-    // host. We should be using stream ops for performing such calculations that
-    // we can attach affinities to and understand what devices are being used.
-    passManager.addPass(createCPUMaterializeUpperBoundTileSizePass());
-
     // Preprocess executables using an external tool. The tool may mutate one or
     // more variants and even insert or remove variants.
     for (auto command : clPreprocessExecutablesWith) {
diff --git a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
index 681b336..863a644 100644
--- a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
+++ b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
@@ -336,8 +336,7 @@
   registry.addExtension(
       +[](MLIRContext *context, IREE::Encoding::IREEEncodingDialect *dialect) {
         UnhoistableOpInterfaceHelper<
-            IREE::Encoding::SetEncodingOp,
-            IREE::Encoding::UpperBoundTileSizeOp>::registerOpInterface(context);
+            IREE::Encoding::SetEncodingOp>::registerOpInterface(context);
       });
   // Register hoistable type interfaces for linalg ops.
   // We have a specific allow-list for Linalg ops because we want to consider
diff --git a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
index 30baabc..5c264be 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
@@ -72,7 +72,6 @@
     }
 
     OpPassManager passManager(moduleOp.getOperationName());
-    passManager.addPass(createCPUMaterializeUpperBoundTileSizePass());
     passManager.addPass(createCPUMaterializeHostEncodingPass());
     if (failed(runPipeline(passManager, moduleOp))) {
       return signalPassFailure();