Revert "[LLVMCPU][ArmSME] Add `2d-scalable-to-1d-scalable` pass" (#16963)
Reverts openxla/iree#16712
Some builds that were not included in pre-commit are failing, lets
revert until we can take a closer look.
(see also https://github.com/openxla/iree/pull/16961)
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp
index b01d646..d5f12e4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp
@@ -52,106 +52,46 @@
}
};
-/// Returns the tiling level that contains the vector dim at `dimPos` (which is
-/// an index into the result of `getVectorTileSizes()`).
-unsigned TilingConfig::getTilingLevelForVectorDimPosition(unsigned dimPos) {
- constexpr std::array vectorTilingLevels{VectorCommonParallelTiles,
- VectorReductionTiles,
- VectorInnerParallelTiles};
- ArrayRef<TilingLevel> possibleLevels = vectorTilingLevels;
- if (!hasVectorInnerParallelLevel())
- possibleLevels = possibleLevels.drop_back();
- std::optional<unsigned> foundLevel;
- auto tilingLevels = loweringConfig.getTilingLevels();
- for (TilingLevel level : possibleLevels) {
- auto tilingLevelIndex = getActualLevel(level);
- if (tilingLevels[tilingLevelIndex].getSizes()[dimPos] != 0) {
- assert(!foundLevel.has_value() &&
- "expected at most one tile size to be non-zero");
- foundLevel = tilingLevelIndex;
- }
- }
- assert(foundLevel.has_value() && "no vector size found for `dimPos`");
- return *foundLevel;
-}
-
-/// Returns the tile size (size + scalability pair) at `index`. The
-/// `scalableFlags` can be empty.
-static std::pair<int64_t, bool> getTileSizeAtIndex(ArrayRef<int64_t> sizes,
- ArrayRef<bool> scalableFlags,
- unsigned index) {
- return std::make_pair(sizes[index],
- index < scalableFlags.size() && scalableFlags[index]);
-}
-
/// Returns the tile sizes of all the vector dimensions, including parallel
/// and reduction dimensions.
SizesAndScalableFlags TilingConfig::getVectorTileSizes() {
unsigned numDims = getNumDimensions();
SmallVector<int64_t> vectorSizes(numDims, 0);
SmallVector<bool> scalableFlags(numDims, false);
- auto tilingLevels = loweringConfig.getTilingLevels();
- for (int dimPos = 0; dimPos < numDims; ++dimPos) {
- unsigned dimTilingLevel = getTilingLevelForVectorDimPosition(dimPos);
- std::tie(vectorSizes[dimPos], scalableFlags[dimPos]) = getTileSizeAtIndex(
- tilingLevels[dimTilingLevel].getSizes(),
- tilingLevels[dimTilingLevel].getScalableFlags(), dimPos);
+ auto [parallelCommonSizes, parallelCommonScalableFlags] =
+ getVectorCommonParallelSizes();
+ auto [reductionSizes, reductionScalableFlags] = getVectorReductionSizes();
+ SizesAndScalableFlags parallelInnerTiles;
+ if (hasVectorInnerParallelLevel()) {
+ parallelInnerTiles = getVectorInnerParallelSizes();
}
+
+ for (int i = 0; i < numDims; ++i) {
+ SmallVector<bool> dimSizes;
+ dimSizes.push_back(!!parallelCommonSizes[i] ||
+ parallelCommonScalableFlags[i]);
+ dimSizes.push_back(!!reductionSizes[i] || reductionScalableFlags[i]);
+ if (hasVectorInnerParallelLevel())
+ dimSizes.push_back(!!parallelInnerTiles.first[i] ||
+ parallelInnerTiles.second[i]);
+
+ unsigned nonZeroCnt = llvm::count(dimSizes, true);
+ assert(nonZeroCnt <= 1 && "expected one tile size at most to be non-zero");
+ (void)nonZeroCnt;
+
+ vectorSizes[i] = parallelCommonSizes[i] ^ reductionSizes[i];
+ if (hasVectorInnerParallelLevel())
+ vectorSizes[i] ^= parallelInnerTiles.first[i];
+
+ scalableFlags[i] =
+ parallelCommonScalableFlags[i] || reductionScalableFlags[i];
+ if (hasVectorInnerParallelLevel())
+ scalableFlags[i] |= parallelInnerTiles.second[i];
+ }
+
return std::make_pair(vectorSizes, scalableFlags);
}
-/// Returns a new `LoweringConfigAttr`, with the tile sizes of vector
-/// dimensions, set to `sizes`, and the corresponding scalability set to
-/// `scalableFlags`.
-IREE::Codegen::LoweringConfigAttr
-TilingConfig::getLoweringConfigWithNewVectorSizes(
- ArrayRef<int64_t> sizes, ArrayRef<bool> scalableFlags) {
- unsigned numDims = getNumDimensions();
- assert(sizes.size() == numDims &&
- "expected `sizes` to match number of dimensions");
- assert((scalableFlags.empty() || scalableFlags.size() == numDims) &&
- "expected `scalableFlags` to match "
- "number of dimensions (or be empty)");
-
- // Make a map from tiling levels to vector dims at that level.
- std::array<SmallVector<unsigned, 4>, MaxNumTileLevels> tilingLevelToDimsMap;
- for (unsigned dimPos = 0; dimPos < numDims; ++dimPos) {
- auto tilingLevelIndex = getTilingLevelForVectorDimPosition(dimPos);
- tilingLevelToDimsMap[tilingLevelIndex].push_back(dimPos);
- }
-
- MLIRContext *context = loweringConfig.getContext();
- auto tilingLevels = loweringConfig.getTilingLevels();
- SmallVector<IREE::Codegen::LoweringConfigTilingLevelAttr> newTilingLevelsList(
- tilingLevels.begin(), tilingLevels.end());
-
- // For each vector tiling level:
- for (auto [tilingLevelIndex, tilingLevelDims] :
- llvm::enumerate(tilingLevelToDimsMap)) {
- if (tilingLevelDims.empty())
- continue;
- auto level = tilingLevels[tilingLevelIndex];
- SmallVector<int64_t> newSizes(level.getSizes());
- SmallVector<bool> newScalableFlags(level.getScalableFlags());
- newScalableFlags.resize(numDims);
- // 1. Update all the vector sizes within that tiling level.
- for (unsigned dimPos : tilingLevelDims) {
- std::tie(newSizes[dimPos], newScalableFlags[dimPos]) =
- getTileSizeAtIndex(sizes, scalableFlags, dimPos);
- }
- // 2. Then create a new tiling level attribute for that level.
- auto newLevel = IREE::Codegen::LoweringConfigTilingLevelAttr::get(
- context, newSizes, level.getInterchange(), newScalableFlags);
- newTilingLevelsList[tilingLevelIndex] = newLevel;
- }
-
- // Create a new `lowering_config` attribute.
- auto newTilingLevels = IREE::Codegen::LoweringConfigTilingLevelsAttr::get(
- context, newTilingLevelsList);
- return IREE::Codegen::LoweringConfigAttr::get(
- context, newTilingLevels, loweringConfig.getNativeVectorSize());
-}
-
/// Returns a list with the tiling levels that can be fused for this
/// configuration.
SmallVector<int64_t> TilingConfig::getFusableLevels() {
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h
index 5ac2fdf..ef2a138 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h
+++ b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h
@@ -109,21 +109,10 @@
return getVectorSizesForLevel(getVectorInnerParallelLevel());
}
- /// Returns the tiling level that contains the vector dim at `dimPos` (which
- /// is an index into the result of `getVectorTileSizes()`).
- unsigned getTilingLevelForVectorDimPosition(unsigned dimPos);
-
/// Returns the tile sizes of all the vector dimensions, including parallel
/// and reduction dimensions.
SizesAndScalableFlags getVectorTileSizes();
- /// Returns a new `LoweringConfigAttr`, with the tile sizes of vector
- /// dimensions, set to `sizes`, and the corresponding scalability set to
- /// `scalableFlags`.
- IREE::Codegen::LoweringConfigAttr
- getLoweringConfigWithNewVectorSizes(ArrayRef<int64_t> sizes,
- ArrayRef<bool> scalableFlags);
-
/// Returns a list with the tiling levels that can be fused for this
/// configuration.
SmallVector<int64_t> getFusableLevels();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
index 62f1208..d030294 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
@@ -52,7 +52,6 @@
"DispatchABI.cpp",
"ExpandF16OpToF32Pass.cpp",
"KernelDispatch.cpp",
- "LLVMCPU2DScalableTo1DScalable.cpp",
"LLVMCPUAssignConstantOrdinals.cpp",
"LLVMCPUAssignImportOrdinals.cpp",
"LLVMCPUCheckIRBeforeLLVMConversion.cpp",
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
index abbf5c0..f794896 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
@@ -53,7 +53,6 @@
"DispatchABI.cpp"
"ExpandF16OpToF32Pass.cpp"
"KernelDispatch.cpp"
- "LLVMCPU2DScalableTo1DScalable.cpp"
"LLVMCPUAssignConstantOrdinals.cpp"
"LLVMCPUAssignImportOrdinals.cpp"
"LLVMCPUCheckIRBeforeLLVMConversion.cpp"
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPU2DScalableTo1DScalable.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPU2DScalableTo1DScalable.cpp
deleted file mode 100644
index 7ae37a4..0000000
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPU2DScalableTo1DScalable.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright 2024 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "iree/compiler/Codegen/Common/TileSizeSelection.h"
-#include "iree/compiler/Codegen/LLVMCPU/PassDetail.h"
-#include "iree/compiler/Codegen/LLVMCPU/Passes.h"
-#include "iree/compiler/Codegen/LLVMCPU/Utils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
-#include "mlir/Dialect/SCF/Utils/Utils.h"
-#include "mlir/Pass/Pass.h"
-
-namespace mlir::iree_compiler {
-
-namespace {
-
-/// Currently, IREE requires `lowering_config`s to be propagated to all compute
-/// ops within a dispatch region. This can be problematic for SME which only
-/// supports 2D scalable outer product operations -- if an operation cannot be
-/// lowered to an outer product, we can only scalably vectorize it in one
-/// dimension.
-///
-/// The solution here is this pass (`2d-scalable-to-1d-scalable`) that runs just
-/// before vectorization, that drops unsupported scalable tile/vector sizes,
-/// producing loops of ops that will only be vectorized scalably in one
-/// dimension. This allows earlier passes like `tile-and-fuse` to still function
-/// correctly.
-///
-/// Take this simple example:
-///
-/// ```mlir
-/// // Lowering configs propagated (from matmul):
-/// linalg.fill {lowering_config = [[4], [4]]
-/// linalg.matmul {lowering_config = [[4], [4], 1]
-/// linalg.generic {lowering_config = [[4], [4]]
-/// ```
-/// Here the `linalg.generic` cannot be vectorized with 2D scalable vectors.
-///
-/// After `tile-and-fuse` (which requires consistent lowering configs):
-/// ```mlir
-/// scf.for i in range(0, 1000) step 4 x vscale {
-/// scf.for j in range(0, 2000) step 4 x vscale {
-/// linalg.fill {lowering_config = [[4], [4]]
-/// for k in range(0, 100) step 1 {
-/// linalg.matmul {lowering_config = [[4], [4], 1]
-/// }
-/// // 2D scalable vectorization unsupported here:
-/// linalg.generic {lowering_config = [[4], [4]]
-/// }
-/// }
-/// ```
-///
-/// Unsupported scalability removed (by `2d-scalable-to-1d-scalable`):
-/// ```mlir
-/// scf.for i in range(0, 1000) step 4 x vscale {
-/// scf.for j in range(0, 2000) step 4 x vscale {
-/// linalg.fill {lowering_config = [[4], [4]]
-/// for k in range(0, 100) step 1 {
-/// linalg.matmul {lowering_config = [[4], [4], 1]
-/// }
-/// // Insert a new loop:
-/// for n in range(0, 4 x vscale) step 4 {
-/// // Drop a scalable dim:
-/// linalg.generic {lowering_config = [4, [4]]
-/// }
-/// }
-/// }
-/// ```
-///
-/// This can now be vectorized and lowered successfully, which produces a
-/// dispatch that mixes SME and SVE.
-class LLVMCPU2DScalableTo1DScalablePass
- : public LLVMCPU2DScalableTo1DScalableBase<
- LLVMCPU2DScalableTo1DScalablePass> {
-public:
- using LLVMCPU2DScalableTo1DScalableBase::LLVMCPU2DScalableTo1DScalableBase;
-
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry
- .insert<arith::ArithDialect, linalg::LinalgDialect, scf::SCFDialect>();
- }
-
- void runOnOperation() override;
-};
-
-static bool opKnownToSupport2DScalableVectorizationWithArmSME(Operation *op) {
- return isa<linalg::MatmulOp, linalg::MatmulTransposeAOp, linalg::FillOp>(op);
-}
-
-// Note: It would be easy to parameterize this rewrite to convert N-D scalable
-// operations to M-D scalable ones (where M < N). However this is currently not
-// needed.
-static LogicalResult
-dropScalabilityFromUnsupportedOperations(mlir::FunctionOpInterface funcOp,
- bool assumeArmSME = false) {
- // Note: Which operations should have scalability dropped is specific to
- // ArmSME. The rest of this rewrite could be generic (though currently
- // there's no other targets that support > 1D scalability).
- auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp);
- bool isArmSME = assumeArmSME || hasSMEFeature(targetAttr);
-
- if (!isArmSME)
- return success();
-
- SmallVector<TilingInterface> computeOps;
- funcOp.walk([&](TilingInterface op) {
- if (!opKnownToSupport2DScalableVectorizationWithArmSME(op))
- computeOps.push_back(op);
- });
-
- for (TilingInterface tilingOp : computeOps) {
- auto loweringConfigAttr = getLoweringConfig(tilingOp);
- if (!loweringConfigAttr)
- continue;
-
- TilingConfig tilingConfig(loweringConfigAttr);
- auto [vectorSizes, scalableFlags] = tilingConfig.getVectorTileSizes();
- auto numScalableDims = llvm::count(scalableFlags, true);
-
- if (numScalableDims <= 1)
- continue;
-
- SmallVector<int64_t> loopTileSizes;
- SmallVector<bool> newScalableFlags;
- for (auto [flag, size] : llvm::zip_equal(scalableFlags, vectorSizes)) {
- if (flag && numScalableDims >= 2) {
- --numScalableDims;
- loopTileSizes.push_back(size);
- newScalableFlags.push_back(false);
- } else {
- loopTileSizes.push_back(0);
- newScalableFlags.push_back(flag);
- }
- }
-
- IRRewriter rewriter(tilingOp->getContext());
- rewriter.setInsertionPoint(tilingOp);
-
- // 2. Re-tile the operation with some scalability dropped. This introduces
- // loops for previously scalable vector/tile sizes.
- scf::SCFTilingOptions options;
- setSCFTileSizes(options, tilingOp, loopTileSizes, /*tileScalableFlags=*/{});
- auto tilingResult = scf::tileUsingSCF(rewriter, tilingOp, options);
- if (failed(tilingResult))
- return failure();
-
- // 3. Update the lowering config of the new tiled operations.
- auto newLoweringConfig = tilingConfig.getLoweringConfigWithNewVectorSizes(
- vectorSizes, newScalableFlags);
- for (auto *newOp : tilingResult->tiledOps) {
- if (isa<TilingInterface>(newOp))
- setLoweringConfig(newOp, newLoweringConfig);
- }
-
- rewriter.replaceOp(tilingOp, tilingResult->replacements);
- }
- return success();
-}
-
-void LLVMCPU2DScalableTo1DScalablePass::runOnOperation() {
- if (failed(dropScalabilityFromUnsupportedOperations(getOperation(),
- assumeArmSME)))
- signalPassFailure();
-}
-
-} // namespace
-
-std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
-createLLVMCPU2DScalableTo1DScalablePass() {
- return std::make_unique<LLVMCPU2DScalableTo1DScalablePass>();
-}
-
-} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index 2701f51..a577d47 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -397,11 +397,6 @@
nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUPeelPass());
}
- if (pipelineOpt.enableAArch64SSVE) {
- nestedModulePM.addNestedPass<func::FuncOp>(
- createLLVMCPU2DScalableTo1DScalablePass());
- }
-
{
nestedModulePM.addNestedPass<func::FuncOp>(createVectorizePadPass());
nestedModulePM.addNestedPass<func::FuncOp>(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
index b7aa798..22714cf 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
@@ -67,9 +67,6 @@
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createLLVMCPUTileAndFusePass(int64_t tilingLevel = -1);
-std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
-createLLVMCPU2DScalableTo1DScalablePass();
-
/// Pass to tile TilingInterface ops with given tilingLevel.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createLLVMCPUTilePass(int64_t tilingLevel = -1);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
index 7969416..c4f5884 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.td
@@ -139,19 +139,6 @@
];
}
-// Note: This pass is currently only required when targeting Arm SME (which is
-// the only target that currently has some concept of 2D scalability).
-def LLVMCPU2DScalableTo1DScalable :
- InterfacePass<"iree-llvmcpu-2d-scalable-to-1d-scalable", "mlir::FunctionOpInterface"> {
- let summary = "Pass to replace unsupported scalable dimensions with loops.";
- let constructor =
- "mlir::iree_compiler::createLLVMCPU2DScalableTo1DScalablePass()";
- let options = [
- Option<"assumeArmSME", "assume-arm-sme", "bool", /*default=*/"false",
- "Assume the current target is ArmSME (used for testing)">
- ];
-}
-
def LLVMCPUUnfuseFMAOps :
InterfacePass<"iree-llvmcpu-unfuse-fma-pass", "mlir::FunctionOpInterface"> {
let summary = "Convert llvm.fma into unfused mulf and addf ops";
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/2d-scalable-to-1d-scalable.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/2d-scalable-to-1d-scalable.mlir
deleted file mode 100644
index cbf25db..0000000
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/2d-scalable-to-1d-scalable.mlir
+++ /dev/null
@@ -1,88 +0,0 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-2d-scalable-to-1d-scalable{assume-arm-sme=true},cse))" --split-input-file %s | FileCheck %s
-
-#compute_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [[4], [4]], [0, 0], [0, 0]]>
-#matmul_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0], [[4], [4], 0], [0, 0, 1], [0, 0, 0]]>
-#dim_0_map = affine_map<(d0)[s0] -> (-d0 + 32400, s0)>
-#dim_1_map = affine_map<(d0)[s0] -> (-d0 + 16, s0)>
-
-// Here's an example from a dispatch where a matmul has been given a 2D-scalable
-// lowering config (#matmul_config) for ArmSME. That config has been propagated
-// to compute ops within that same dispatch as (#compute_config).
-//
-// This is okay for the linalg.fill but the linalg.generic cannot be lowered
-// to make use of 2D scalable vectors. ArmSME only supports 2D scalable outer
-// products, so if it's not an outer product, we can only scalably vectorize in
-// one dimension.
-//
-// The initial tile-and-fuse pass requires lowering configs to be consistent,
-// so we keep the keep the lowering_configs unchanged until after that pass.
-//
-// 2d-scalable-to-1d-scalable can then remove unsupported scalable
-// dimensions, and introduce loops. This results in dispatches that fuse both
-// SME and SVE.
-
-// Extracted from an IR dump after iree-llvmcpu-tile-and-fuse:
-func.func @scalable_2d_matmul_and_generic(%arg0: tensor<32400x32xf32>, %arg1: tensor<32x16xf32>, %arg2: tensor<32400x16xf32>, %arg3: tensor<16xf32>) -> tensor<32400x16xf32> {
- %c0 = arith.constant 0 : index
- %c4 = arith.constant 4 : index
- %c16 = arith.constant 16 : index
- %c32400 = arith.constant 32400 : index
- %cst = arith.constant 0.000000e+00 : f32
- %0 = vector.vscale
- %1 = arith.muli %0, %c4 : index
- %2 = scf.for %arg4 = %c0 to %c32400 step %1 iter_args(%arg5 = %arg2) -> (tensor<32400x16xf32>) {
- %3 = scf.for %arg6 = %c0 to %c16 step %1 iter_args(%arg7 = %arg5) -> (tensor<32400x16xf32>) {
- %4 = affine.min #dim_0_map(%arg4)[%1]
- %5 = affine.min #dim_1_map(%arg6)[%1]
- %extracted_slice = tensor.extract_slice %arg0[%arg4, 0] [%4, 32] [1, 1] : tensor<32400x32xf32> to tensor<?x32xf32>
- %extracted_slice_0 = tensor.extract_slice %arg1[0, %arg6] [32, %5] [1, 1] : tensor<32x16xf32> to tensor<32x?xf32>
- %6 = tensor.empty(%4, %5) : tensor<?x?xf32>
- %7 = linalg.fill {lowering_config = #compute_config}
- ins(%cst : f32) outs(%6 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %8 = linalg.matmul {lowering_config = #matmul_config}
- ins(%extracted_slice, %extracted_slice_0 : tensor<?x32xf32>, tensor<32x?xf32>)
- outs(%7 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %extracted_slice_1 = tensor.extract_slice %arg3[%arg6] [%5] [1] : tensor<16xf32> to tensor<?xf32>
- %extracted_slice_2 = tensor.extract_slice %arg7[%arg4, %arg6] [%4, %5] [1, 1] : tensor<32400x16xf32> to tensor<?x?xf32>
- %9 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%8, %extracted_slice_1 : tensor<?x?xf32>, tensor<?xf32>)
- outs(%extracted_slice_2 : tensor<?x?xf32>) attrs = {lowering_config = #compute_config} {
- ^bb0(%in: f32, %in_3: f32, %out: f32):
- %10 = arith.mulf %in, %in_3 : f32
- linalg.yield %10 : f32
- } -> tensor<?x?xf32>
- %inserted_slice = tensor.insert_slice %9 into %arg7[%arg4, %arg6] [%4, %5] [1, 1] : tensor<?x?xf32> into tensor<32400x16xf32>
- scf.yield %inserted_slice : tensor<32400x16xf32>
- }
- scf.yield %3 : tensor<32400x16xf32>
- }
- return %2 : tensor<32400x16xf32>
-}
-// CHECK: #[[FILL_CONFIG:.*]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], {{\[}}[4], [4]], [0, 0], [0, 0]]>
-// CHECK: #[[MATMUL_CONFIG:.*]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], {{\[}}[4], [4], 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK: #[[GENERIC_CONFIG:.*]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [4, [4]], [0, 0], [0, 0]]>
-//
-// CHECK: func.func @scalable_2d_matmul_and_generi
-// CHECK: %[[C4:.*]] = arith.constant 4 : index
-// CHECK: %[[VSCALE:.*]] = vector.vscale
-// CHECK: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index
-// CHECK: scf.for
-// CHECK-SAME: step %[[C4_VSCALE]]
-// CHECK-SAME: {
-// CHECK: scf.for
-// CHECK-SAME: step %[[C4_VSCALE]]
-// CHECK-SAME: {
-// CHECK: linalg.fill
-// CHECK-SAME: lowering_config = #[[FILL_CONFIG]]
-// CHECK: linalg.matmul
-// CHECK-SAME: lowering_config = #[[MATMUL_CONFIG]]
-// CHECK: scf.for
-// CHECK-SAME: step %[[C4]]
-// CHECK-SAME: {
-// CHECK: linalg.generic
-// CHECK-SAME: lowering_config = #[[GENERIC_CONFIG]]
-// CHECK: }
-// CHECK: }
-// CHECK: }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
index 6a2c1d8..c0fae3f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/BUILD.bazel
@@ -19,7 +19,6 @@
srcs = enforce_glob(
# keep sorted
[
- "2d-scalable-to-1d-scalable.mlir",
"aarch64_dotprod_vector_lowering.mlir",
"aarch64_vector_lowering.mlir",
"apply_scale_lowering.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
index 94e9be2..ad2ee35 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
@@ -14,7 +14,6 @@
NAME
lit
SRCS
- "2d-scalable-to-1d-scalable.mlir"
"aarch64_dotprod_vector_lowering.mlir"
"aarch64_vector_lowering.mlir"
"apply_scale_lowering.mlir"