[CPU] Fix tile sizes adjustment for pack op with outer_dims_perm. (#14441)

The tile sizes of distribution and TileAndFuse are set based on rootOp.
They should be transposed if outer_dims_perm exists. Otherwise, they
won't be applied to corresponding loop. E.g., say that the expected tile
sizes for root op are `[8, 128]` and the pack op consumer has
outer_dims_perm. If they are not transposed, below snippet would be
generated and a `128x8` slice of rootOp would be created.

```
%producer = linalg.generic
scf.for ... step 8
  scf.for ... step 128
    %slice = tenser.extract_slice %producer [%i, %j] [128, 8] [1, 1]
    %pack = tensor.pack %slice outer_dims_perm = [1, 0] ...
```

What we want is to create a `8x128` slice. So we have to transpose tile
sizes if outer_dims_perm presents.

Fixes https://github.com/openxla/iree/issues/14252
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index a13a7fb..215af71 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -1965,6 +1965,7 @@
 
     // Only adjust tile sizes for distribution and TileAndFuse, which are the
     // first two tile lists.
+    auto outerDimsPerm = packOp.getOuterDimsPerm();
     for (int i = 0, e = std::min<int>(tileSizesList.size(), 2); i < e; ++i) {
       auto &tileSizes = tileSizesList[i];
       ArrayRef<int64_t> innerTiles = packOp.getStaticInnerTiles();
@@ -1977,6 +1978,8 @@
         LLVM_DEBUG(KD_DBGS() << "Scale # " << pos << " tile size to "
                              << tileSizes[pos] << "\n");
       }
+      if (!outerDimsPerm.empty())
+        applyPermutationToVector(tileSizes, outerDimsPerm);
     }
 
     return WalkResult::advance();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
index 4497093..eeea4e9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
@@ -1698,7 +1698,7 @@
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 1], [8, 1], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 64], [1, 8], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 //      CHECK: hal.executable.export public @transpose_pack
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]