[CPU] Enable masking for single pack/unpack dispatch if supported. (#14632)

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 67edd67..3c738b1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -292,7 +292,8 @@
         }
         case IREE::Codegen::DispatchLoweringPassPipeline::CPUDataTiling: {
           TilingConfig tilingConfig = getTilingConfigForPipeline(moduleOp);
-          addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig);
+          addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig,
+                                   enableVectorMasking);
           break;
         }
         case IREE::Codegen::DispatchLoweringPassPipeline::VMVXDefault:
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index 20f0c3f..4c34d3f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -632,7 +632,8 @@
 }
 
 void addCPUDataTilingPipeline(OpPassManager &passManager,
-                              TilingConfig &tilingConfig) {
+                              TilingConfig &tilingConfig,
+                              bool enableVectorMasking) {
   addTileAndDistributePasses(passManager);
   OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
   nestedModulePM.addNestedPass<func::FuncOp>(
@@ -643,6 +644,7 @@
   {
     GenericVectorizationPassOptions options;
     options.vectorizePadding = true;
+    options.enableVectorMasking = enableVectorMasking;
     nestedModulePM.addNestedPass<func::FuncOp>(
         createGenericVectorizationPass(options));
     nestedModulePM.addNestedPass<func::FuncOp>(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
index a2854d2..47dad29 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.h
@@ -122,7 +122,8 @@
 
 /// Populates the passes to lower ops through data tiling transformations.
 void addCPUDataTilingPipeline(OpPassManager &passManager,
-                              TilingConfig &tilingConfig);
+                              TilingConfig &tilingConfig,
+                              bool enableVectorMasking);
 
 /// Populates the passes to lower to scalars operations for linalg based
 /// code-generation. This pipeline does not vectorize, but instead just
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
index 3073a84..f65de18 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/data_tiling_pipeline.mlir
@@ -124,4 +124,5 @@
   }
 }
 // CHECK:          func.func @unaligned_pack
+// CHECK-COUNT-16:   vector.maskedload {{.+}} vector<16xf32>
 // CHECK-COUNT-64:   vector.shuffle