[CPU] Enable masking for single pack/unpack dispatch if supported. (#14632)
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 67edd67..3c738b1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -292,7 +292,8 @@
}
case IREE::Codegen::DispatchLoweringPassPipeline::CPUDataTiling: {
TilingConfig tilingConfig = getTilingConfigForPipeline(moduleOp);
- addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig);
+ addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig,
+ enableVectorMasking);
break;
}
case IREE::Codegen::DispatchLoweringPassPipeline::VMVXDefault:
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index 20f0c3f..4c34d3f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -632,7 +632,8 @@
}
void addCPUDataTilingPipeline(OpPassManager &passManager,
- TilingConfig &tilingConfig) {
+ TilingConfig &tilingConfig,
+ bool enableVectorMasking) {
addTileAndDistributePasses(passManager);
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
nestedModulePM.addNestedPass<func::FuncOp>(
@@ -643,6 +644,7 @@
{
GenericVectorizationPassOptions options;
options.vectorizePadding = true;
+ options.enableVectorMasking = enableVectorMasking;
nestedModulePM.addNestedPass<func::FuncOp>(
createGenericVectorizationPass(options));
nestedModulePM.addNestedPass<func::FuncOp>(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
index a2854d2..47dad29 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
@@ -122,7 +122,8 @@
/// Populates the passes to lower ops through data tiling transformations.
void addCPUDataTilingPipeline(OpPassManager &passManager,
- TilingConfig &tilingConfig);
+ TilingConfig &tilingConfig,
+ bool enableVectorMasking);
/// Populates the passes to lower to scalars operations for linalg based
/// code-generation. This pipeline does not vectorize, but instead just
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
index 3073a84..f65de18 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
@@ -124,4 +124,5 @@
}
}
// CHECK: func.func @unaligned_pack
+// CHECK-COUNT-16: vector.maskedload {{.+}} vector<16xf32>
// CHECK-COUNT-64: vector.shuffle