[CPU] Remove CPUDoubleTilingPeelingExpert (#17329)
`CPUDoubleTilingPeelingExpert` is effectively `CPUDoubleTilingExpert`
with the addition of loop peeling. This patch removes the former to
improve code re-use. In order to enable peeling, update the pipeline
config attribute to contain the `enable_loop_peeling` attribute:
iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
This is a follow-up for https://github.com/iree-org/iree/pull/17231 in
which this new attribute was used to specialise
`CPUConvTileAndDecomposeExpert`.diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.td
index e73193a..675778c 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.td
@@ -15,18 +15,16 @@
: I32EnumAttrCase<"CPUDefault", 0>;
def CPU_DoubleTilingExpert
: I32EnumAttrCase<"CPUDoubleTilingExpert", 1>;
-def CPU_DoubleTilingPeelingExpert
- : I32EnumAttrCase<"CPUDoubleTilingPeelingExpert", 2>;
def CPU_ConvTileAndDecomposeExpert
- : I32EnumAttrCase<"CPUConvTileAndDecomposeExpert", 3>;
+ : I32EnumAttrCase<"CPUConvTileAndDecomposeExpert", 2>;
def CPU_Mmt4dTilingExpert
- : I32EnumAttrCase<"Mmt4dTilingExpert", 4>;
+ : I32EnumAttrCase<"Mmt4dTilingExpert", 3>;
def CPU_BufferOpsTileAndVectorize
- : I32EnumAttrCase<"CPUBufferOpsTileAndVectorize", 5>;
+ : I32EnumAttrCase<"CPUBufferOpsTileAndVectorize", 4>;
def CPU_DataTiling
- : I32EnumAttrCase<"CPUDataTiling", 6>;
+ : I32EnumAttrCase<"CPUDataTiling", 5>;
def CPU_LinalgExtTileAndVectorize
- : I32EnumAttrCase<"CPULinalgExtTileAndVectorize", 7>;
+ : I32EnumAttrCase<"CPULinalgExtTileAndVectorize", 6>;
def LLVMGPU_Default
: I32EnumAttrCase<"LLVMGPUDefault", 100>;
@@ -84,7 +82,7 @@
"identifier for pass pipeline use to lower dispatch region", [
// CPU CodeGen pipelines
CPU_Default, CPU_DoubleTilingExpert,
- CPU_DoubleTilingPeelingExpert, CPU_ConvTileAndDecomposeExpert,
+ CPU_ConvTileAndDecomposeExpert,
CPU_Mmt4dTilingExpert, CPU_BufferOpsTileAndVectorize,
CPU_DataTiling, CPU_LinalgExtTileAndVectorize,
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 8802ec6..fe6e4fb 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -260,6 +260,14 @@
return VectorPreProcStrategy::None;
}
+DictionaryAttr getPipelineConfWithPeelingAttr(MLIRContext *context) {
+ auto enableLoopPeelingAttrName = getEnableLoopPeelingAttrName(context);
+ auto unitAttr = UnitAttr::get(context);
+
+ return DictionaryAttr::get(
+ context, ArrayRef<NamedAttribute>({enableLoopPeelingAttrName, unitAttr}));
+}
+
/// Looks for the `native_vector_size` attribute in the hal.executable.target
/// looked up from this op.
static int64_t
@@ -1063,9 +1071,12 @@
LLVM_DEBUG(KD_DBGS() << "Final tile scalable flags for contraction: "
<< newScalableTileFlags << "\n");
+ DictionaryAttr pipelineConfig =
+ getPipelineConfWithPeelingAttr(op.getContext());
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, op, tileSizes, newScalableTileFlags,
- DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert);
+ DispatchLoweringPassPipeline::CPUDoubleTilingExpert,
+ /*workgroupSize=*/{}, /*subgroupSize=*/{}, pipelineConfig);
}
static LogicalResult
@@ -1151,11 +1162,14 @@
<< newScalableTileFlags << "\n");
auto pipeline = DispatchLoweringPassPipeline::CPUDoubleTilingExpert;
+ DictionaryAttr pipelineConfig;
if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
- pipeline = DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert;
+ pipelineConfig = getPipelineConfWithPeelingAttr(op.getContext());
}
- return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, newTileSizes,
- newScalableTileFlags, pipeline);
+
+ return setOpConfigAndEntryPointFnTranslation(
+ entryPointFn, op, newTileSizes, newScalableTileFlags, pipeline,
+ /*workgroupSize=*/{}, /*subgroupSize=*/{}, pipelineConfig);
}
/// Returns default hard-coded vector sizes for a give target. No smartness
@@ -1843,17 +1857,19 @@
// For non-tensor based ops use the Buffer ops pipeline.
DispatchLoweringPassPipeline passPipeline;
+ DictionaryAttr pipelineConfig;
if (genericOp.hasPureTensorSemantics()) {
- passPipeline =
- vecPreProcStrategy == VectorPreProcStrategy::Peeling
- ? DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert
- : DispatchLoweringPassPipeline::CPUDoubleTilingExpert;
+ passPipeline = DispatchLoweringPassPipeline::CPUDoubleTilingExpert;
+ if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
+ pipelineConfig = getPipelineConfWithPeelingAttr(genericOp.getContext());
+ }
} else {
passPipeline = DispatchLoweringPassPipeline::CPUBufferOpsTileAndVectorize;
}
- return setOpConfigAndEntryPointFnTranslation(entryPointFn, genericOp,
- tileSizes, passPipeline);
+ return setOpConfigAndEntryPointFnTranslation(
+ entryPointFn, genericOp, tileSizes, passPipeline, /*workgroupSize=*/{},
+ /*subgroupSize=*/{}, pipelineConfig);
}
/// Set lowering info to be used by the transform dialect jitter.
@@ -2034,16 +2050,20 @@
<< "\n");
DispatchLoweringPassPipeline passPipeline;
+ DictionaryAttr pipelineConfig;
if (genericOp.hasPureBufferSemantics()) {
passPipeline = DispatchLoweringPassPipeline::CPUBufferOpsTileAndVectorize;
- } else if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
- passPipeline = DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert;
} else {
passPipeline = DispatchLoweringPassPipeline::CPUDoubleTilingExpert;
}
- return setOpConfigAndEntryPointFnTranslation(entryPointFn, genericOp,
- tileSizes, passPipeline);
+ if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
+ pipelineConfig = getPipelineConfWithPeelingAttr(genericOp.getContext());
+ }
+
+ return setOpConfigAndEntryPointFnTranslation(
+ entryPointFn, genericOp, tileSizes, passPipeline, /*workgroupSize=*/{},
+ /*subgroupSize=*/{}, pipelineConfig);
}
/// Sets the lowering configuration for a generic op to use
@@ -2180,14 +2200,7 @@
DictionaryAttr pipelineConfig;
if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
- // Enable peeling. To this end, attach extra info to the pipeline config.
- // This will later be extracted by LLVMCPULowerExecutableTargetPass.
- auto context = convOp.getContext();
- auto enableLoopPeelingAttrName = getEnableLoopPeelingAttrName(context);
- auto unitAttr = UnitAttr::get(context);
- pipelineConfig = DictionaryAttr::get(
- context,
- ArrayRef<NamedAttribute>({enableLoopPeelingAttrName, unitAttr}));
+ pipelineConfig = getPipelineConfWithPeelingAttr(convOp.getContext());
}
return setOpConfigAndEntryPointFnTranslation(
@@ -2477,16 +2490,31 @@
}
}
- auto pipeline = getTranslationInfo(entryPointFn).getPassPipeline().getValue();
- if (pipeline == DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert) {
+ auto tInfo = getTranslationInfo(entryPointFn);
+ auto pipeline = tInfo.getPassPipeline().getValue();
+ auto pipelineConfig = tInfo.getConfiguration();
+ if (isLoopPeelingEnabled(entryPointFn)) {
+ // See #16406
LLVM_DEBUG(KD_DBGS() << "unpack fusion does not work with peeling, falling "
"back to non-peeling path");
pipeline = DispatchLoweringPassPipeline::CPUDoubleTilingExpert;
+
+ // Remove the "enable_loop_peeling" attr from pipelineConfig
+ auto enableLoopPeelingAttrName =
+ getEnableLoopPeelingAttrName(rootOp->getContext());
+ auto newPipelineConfigEntries = llvm::to_vector(llvm::make_filter_range(
+ pipelineConfig.getValue(), [&](NamedAttribute entry) {
+ return entry.getName() != enableLoopPeelingAttrName;
+ }));
+
+ pipelineConfig =
+ DictionaryAttr::get(rootOp->getContext(), newPipelineConfigEntries);
}
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, rootOp, tileSizesList,
- loweringConfig.getScalableTileFlagVals(), pipeline);
+ loweringConfig.getScalableTileFlagVals(), pipeline, /*workgroupSize=*/{},
+ /*subgroupSize=*/{}, pipelineConfig);
}
/// Get tile sizes for the generic op and fill into the parallel vector tile
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index fc16f57..732d043 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -105,6 +105,7 @@
pipelineOpts.enableAArch64SSVE =
isAArch64(target) && hasAnySVEFeature(target) && hasSMEFeature(target);
pipelineOpts.enableAArch64I8mm = isAArch64(target) && hasI8mmFeature(target);
+ pipelineOpts.enablePeeling = isLoopPeelingEnabled(funcOp);
IREE::Codegen::TranslationInfoAttr translationInfo =
getTranslationInfo(funcOp);
@@ -132,16 +133,8 @@
break;
}
case IREE::Codegen::DispatchLoweringPassPipeline::
- CPUDoubleTilingPeelingExpert: {
- TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp);
- pipelineOpts.enablePeeling = true;
- addMultiTilingExpertPassPipeline(pipeline, tilingConfig, pipelineOpts);
- break;
- }
- case IREE::Codegen::DispatchLoweringPassPipeline::
CPUConvTileAndDecomposeExpert: {
TilingConfig tilingConfig = getTilingConfigForPipeline(funcOp);
- pipelineOpts.enablePeeling = isLoopPeelingEnabled(funcOp);
addConvTileAndDecomposeExpertPassPipeline(pipeline, tilingConfig,
pipelineOpts);
break;
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
index ee22e3a..6071b91 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-llvmcpu-lower-executable-target))' -split-input-file %s | FileCheck %s
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-#translation = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
module {
func.func @no_peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} {
@@ -29,7 +29,7 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[65, 65, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-#translation = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
module {
func.func @peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} {
@@ -69,7 +69,7 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-#translation = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
module {
func.func @peel_dynamic_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} {
@@ -123,7 +123,7 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0], [8, [32], 0], [0, 0, 1], [0, 0, 0]]>
-#translation = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
module {
func.func @peel_scalable_matmul() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_, translation_info = #translation} {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_ssve_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_ssve_tests.mlir
index ffa4861..43d4c6a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_ssve_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_ssve_tests.mlir
@@ -3,7 +3,6 @@
// Check Armv9 Streaming SVE mode is enabled for the following pipelines:
//
// * CPUBufferOpsTileAndVectorize
-// * CPUDoubleTilingPeelingExpert
// * CPUConvTileAndDecomposeExpert
// * CPUDoubleTilingExpert
@@ -31,27 +30,6 @@
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
module {
func.func @dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_,
- translation_info = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>} {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.constant.load[0] : i32
- %1 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xf32>>
- %2 = tensor.empty() : tensor<1xf32>
- %3 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>}
- ins(%cst : f32) outs(%2 : tensor<1xf32>) -> tensor<1xf32>
- flow.dispatch.tensor.store %3, %1, offsets = [0], sizes = [1], strides = [1] : tensor<1xf32> -> !flow.dispatch.tensor<readwrite:tensor<1xf32>>
- return
- }
-}
-
-// CHECK: func.func @dispatch()
-// CHECK-SAME: arm_locally_streaming
-
-// -----
-#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
-module {
- func.func @dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_,
translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
index 79c7bcf..f1b5c7e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
@@ -21,7 +21,7 @@
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 16, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_tensors_default()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -51,7 +51,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @i4_i4_i32_matmul()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -80,7 +80,7 @@
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 64, 64, 0], [1, 64, 64, 0], [0, 0, 0, 0], [1, 8, 16, 0], [0, 0, 0, 1], [0, 0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @batch_matmul_tensors()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.batch_matmul
@@ -105,7 +105,7 @@
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[28, 20, 0], [28, 20, 0], [0, 0, 0], [8, 16, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -183,7 +183,7 @@
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 16, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_aarch_i8_i8_i32_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -210,7 +210,7 @@
}
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 16, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_aarch_i8_i8_i32_dynamic()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
index ab05ae0..3b06aa3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
@@ -24,7 +24,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, [16], 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_tensors()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -48,7 +48,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[5, 7, 0], [5, 7, 0], [0, 0, 0], [8, [16], 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @static_tensors_non_pow_two_sizes()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -73,7 +73,7 @@
// TODO: FIXME - scalable "16" ([16]) for just 1 element
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], [0, 0, 0], [0, 0, 0], [1, [16], 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @static_tensors_1x1()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
index 69792af..834647e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
@@ -20,7 +20,7 @@
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [8, 32], [0, 0], [0, 0]]>
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], [0, 0, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
index 9157ed2..6578023 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
@@ -19,7 +19,7 @@
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [8, 32], [0, 0], [0, 0]]>
// CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 1], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_riscv()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
index 5241b02..4527d8e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
@@ -19,7 +19,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 0], [64, 0], [0, 0], [32, 0], [0, 16], [0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matvec_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matvec
@@ -54,7 +54,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 0], [64, 0], [0, 0], [32, 0], [0, 16], [0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matvec_dynamic()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matvec
@@ -81,7 +81,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0], [0], [0], [0], [16], [0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @dot_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.dot
@@ -112,7 +112,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0], [0], [0], [0], [16], [0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @dot_dynamic()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.dot
@@ -214,7 +214,7 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [64, 64, 0], [0, 0, 0], [32, 32, 0], [0, 0, 32], [0, 0, 0]]>
-#translation = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
module {
func.func @preset_config_matmul_tensors() attributes {
@@ -236,7 +236,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [32, 32, 0], [0, 0, 32], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @preset_config_matmul_tensors()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -262,7 +262,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[43, 8, 0], [43, 8, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_partially_peel()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -604,7 +604,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -666,7 +666,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_i8_i8_i32_static()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -693,7 +693,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 0, 0], [64, 0, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @gemm_unit_N()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -719,7 +719,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @gemm_unit_M_unit_N()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -748,7 +748,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[11, 49, 0], [11, 49, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @matmul_odd()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul
@@ -1352,7 +1352,7 @@
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 48, 0], [64, 48, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
-// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingPeelingExpert>
+// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
// CHECK: func.func @quant_model()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: linalg.matmul