[Codegen] Add pass to absorb swizzle hints into alloc attributes (#24000)

Part 1/3 of enabling XOR swizzle with pipelining (#23919).

**Overall plan:** `SwizzleHintOp` in the SSA chain blocks both
`memref::multiBuffer` and `scf::pipelineForLoop`. The fix absorbs the
hint into an alloc attribute before pipelining, preserves it through
multi-buffering, then re-inserts hints at leaf users afterward.

**This PR:** Adds `AbsorbSwizzleHintToAllocPass`, which moves swizzle
info from `SwizzleHintOp` to an `iree_codegen.swizzle` attribute on the
defining `memref.alloc` and erases the hint op. The pass is not yet
wired into any pipeline.

Assisted-by: Cursor (Claude)

Signed-off-by: Yu-Zhewen <zhewenyu@amd.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp b/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp
new file mode 100644
index 0000000..486e8cf
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp
@@ -0,0 +1,56 @@
+// Copyright 2026 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Common/Passes.h"
+#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/PatternMatch.h"
+
+namespace mlir::iree_compiler {
+
+#define GEN_PASS_DEF_ABSORBSWIZZLEHINTTOALLOCPASS
+#include "iree/compiler/Codegen/Common/Passes.h.inc"
+
+namespace {
+struct AbsorbSwizzleHintToAllocPass final
+    : impl::AbsorbSwizzleHintToAllocPassBase<AbsorbSwizzleHintToAllocPass> {
+  using Base::Base;
+  void runOnOperation() override;
+};
+} // namespace
+
+/// Absorbs `iree_codegen.swizzle_hint` ops into an attribute on the defining
+/// `memref.alloc`, then erases the hint. This allows downstream passes
+/// (multi-buffering, pipelining) to operate properly.
+static LogicalResult
+absorbSwizzleHintToAlloc(RewriterBase &rewriter,
+                         IREE::Codegen::SwizzleHintOp hintOp) {
+  auto allocOp = hintOp.getOperand().getDefiningOp<memref::AllocOp>();
+  if (!allocOp) {
+    return hintOp.emitError()
+           << "expected swizzle_hint operand to be defined by a memref.alloc";
+  }
+
+  allocOp->setAttr("iree_codegen.swizzle", hintOp.getSwizzleAttr());
+  rewriter.replaceOp(hintOp, hintOp.getOperand());
+  return success();
+}
+
+void AbsorbSwizzleHintToAllocPass::runOnOperation() {
+  FunctionOpInterface funcOp = getOperation();
+  SmallVector<IREE::Codegen::SwizzleHintOp> hintOps;
+  funcOp.walk(
+      [&](IREE::Codegen::SwizzleHintOp hint) { hintOps.push_back(hint); });
+
+  IRRewriter rewriter(funcOp->getContext());
+  for (IREE::Codegen::SwizzleHintOp hintOp : hintOps) {
+    if (failed(absorbSwizzleHintToAlloc(rewriter, hintOp))) {
+      return signalPassFailure();
+    }
+  }
+}
+
+} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
index 176c584..4a8911a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
@@ -73,6 +73,7 @@
 iree_compiler_cc_library(
     name = "Common",
     srcs = [
+        "AbsorbSwizzleHintToAlloc.cpp",
         "AddFastMathFlags.cpp",
         "BlockDynamicDimensions.cpp",
         "BubbleUpOrdinalOps.cpp",
diff --git a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
index 93169e4..fe04f75 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
@@ -66,6 +66,7 @@
     "Transforms.h"
     "UserConfig.h"
   SRCS
+    "AbsorbSwizzleHintToAlloc.cpp"
     "AddFastMathFlags.cpp"
     "BlockDynamicDimensions.cpp"
     "BubbleUpOrdinalOps.cpp"
diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.td b/compiler/src/iree/compiler/Codegen/Common/Passes.td
index 80e326f..17f5c62 100644
--- a/compiler/src/iree/compiler/Codegen/Common/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Common/Passes.td
@@ -19,6 +19,11 @@
                 "given a floating-point mode.";
 }
 
+def AbsorbSwizzleHintToAllocPass :
+    InterfacePass<"iree-codegen-absorb-swizzle-hint-to-alloc", "mlir::FunctionOpInterface"> {
+  let summary = "Absorbs swizzle_hint ops into attributes on the defining alloc";
+}
+
 def BlockDynamicDimensionsPass
     : Pass<"iree-codegen-block-dynamic-dimensions"> {
   let summary = "Expand dynamic dimensions that are known to be multiples of "
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
index d4a2879..e6ddc62 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
@@ -19,6 +19,7 @@
     srcs = enforce_glob(
         # keep sorted
         [
+            "absorb_swizzle_hint_to_alloc.mlir",
             "add_fmfs.mlir",
             "affinemin_canonicalization.mlir",
             "batch_matmuls.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
index 68ef693..0d5889e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
@@ -14,6 +14,7 @@
   NAME
     lit
   SRCS
+    "absorb_swizzle_hint_to_alloc.mlir"
     "add_fmfs.mlir"
     "affinemin_canonicalization.mlir"
     "batch_matmuls.mlir"
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir b/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir
new file mode 100644
index 0000000..3c46587
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir
@@ -0,0 +1,22 @@
+// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-absorb-swizzle-hint-to-alloc))" \
+// RUN:   --split-input-file --mlir-print-local-scope %s | FileCheck %s
+
+func.func @absorb_swizzle_hint_to_alloc() {
+  %alloc0 = memref.alloc() : memref<8192xbf16, #gpu.address_space<workgroup>>
+  %hint0 = iree_codegen.swizzle_hint %alloc0[#iree_codegen.xor_shuffle<128, 8>]
+    : memref<8192xbf16, #gpu.address_space<workgroup>>
+  %alloc1 = memref.alloc() : memref<16384xbf16, #gpu.address_space<workgroup>>
+  %hint1 = iree_codegen.swizzle_hint %alloc1[#iree_codegen.xor_shuffle<256, 16>]
+    : memref<16384xbf16, #gpu.address_space<workgroup>>
+  memref.dealloc %alloc0 : memref<8192xbf16, #gpu.address_space<workgroup>>
+  memref.dealloc %alloc1 : memref<16384xbf16, #gpu.address_space<workgroup>>
+  return
+}
+
+// CHECK-LABEL: func @absorb_swizzle_hint_to_alloc
+//   CHECK-DAG:   memref.alloc() {iree_codegen.swizzle = #iree_codegen.xor_shuffle<128, 8>}
+//  CHECK-SAME:     : memref<8192xbf16, #gpu.address_space<workgroup>>
+//   CHECK-DAG:   memref.alloc() {iree_codegen.swizzle = #iree_codegen.xor_shuffle<256, 16>}
+//  CHECK-SAME:     : memref<16384xbf16, #gpu.address_space<workgroup>>
+//   CHECK-NOT:   iree_codegen.swizzle_hint
+//       CHECK:   return