[Codegen] Add pass to absorb swizzle hints into alloc attributes (#24000)
Part 1/3 of enabling XOR swizzle with pipelining (#23919).
**Overall plan:** `SwizzleHintOp` in the SSA chain blocks both
`memref::multiBuffer` and `scf::pipelineForLoop`. The fix absorbs the
hint into an alloc attribute before pipelining, preserves it through
multi-buffering, then re-inserts hints at leaf users afterward.
**This PR:** Adds `AbsorbSwizzleHintToAllocPass`, which moves swizzle
info from `SwizzleHintOp` to an `iree_codegen.swizzle` attribute on the
defining `memref.alloc` and erases the hint op. The pass is not yet
wired into any pipeline.
Assisted-by: Cursor (Claude)
Signed-off-by: Yu-Zhewen <zhewenyu@amd.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp b/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp
new file mode 100644
index 0000000..486e8cf
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Common/AbsorbSwizzleHintToAlloc.cpp
@@ -0,0 +1,56 @@
+// Copyright 2026 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Common/Passes.h"
+#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/PatternMatch.h"
+
+namespace mlir::iree_compiler {
+
+#define GEN_PASS_DEF_ABSORBSWIZZLEHINTTOALLOCPASS
+#include "iree/compiler/Codegen/Common/Passes.h.inc"
+
+namespace {
+struct AbsorbSwizzleHintToAllocPass final
+ : impl::AbsorbSwizzleHintToAllocPassBase<AbsorbSwizzleHintToAllocPass> {
+ using Base::Base;
+ void runOnOperation() override;
+};
+} // namespace
+
+/// Absorbs `iree_codegen.swizzle_hint` ops into an attribute on the defining
+/// `memref.alloc`, then erases the hint. This allows downstream passes
+/// (multi-buffering, pipelining) to operate properly.
+static LogicalResult
+absorbSwizzleHintToAlloc(RewriterBase &rewriter,
+ IREE::Codegen::SwizzleHintOp hintOp) {
+ auto allocOp = hintOp.getOperand().getDefiningOp<memref::AllocOp>();
+ if (!allocOp) {
+ return hintOp.emitError()
+ << "expected swizzle_hint operand to be defined by a memref.alloc";
+ }
+
+ allocOp->setAttr("iree_codegen.swizzle", hintOp.getSwizzleAttr());
+ rewriter.replaceOp(hintOp, hintOp.getOperand());
+ return success();
+}
+
+void AbsorbSwizzleHintToAllocPass::runOnOperation() {
+ FunctionOpInterface funcOp = getOperation();
+ SmallVector<IREE::Codegen::SwizzleHintOp> hintOps;
+ funcOp.walk(
+ [&](IREE::Codegen::SwizzleHintOp hint) { hintOps.push_back(hint); });
+
+ IRRewriter rewriter(funcOp->getContext());
+ for (IREE::Codegen::SwizzleHintOp hintOp : hintOps) {
+ if (failed(absorbSwizzleHintToAlloc(rewriter, hintOp))) {
+ return signalPassFailure();
+ }
+ }
+}
+
+} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
index 176c584..4a8911a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
@@ -73,6 +73,7 @@
iree_compiler_cc_library(
name = "Common",
srcs = [
+ "AbsorbSwizzleHintToAlloc.cpp",
"AddFastMathFlags.cpp",
"BlockDynamicDimensions.cpp",
"BubbleUpOrdinalOps.cpp",
diff --git a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
index 93169e4..fe04f75 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
@@ -66,6 +66,7 @@
"Transforms.h"
"UserConfig.h"
SRCS
+ "AbsorbSwizzleHintToAlloc.cpp"
"AddFastMathFlags.cpp"
"BlockDynamicDimensions.cpp"
"BubbleUpOrdinalOps.cpp"
diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.td b/compiler/src/iree/compiler/Codegen/Common/Passes.td
index 80e326f..17f5c62 100644
--- a/compiler/src/iree/compiler/Codegen/Common/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Common/Passes.td
@@ -19,6 +19,11 @@
"given a floating-point mode.";
}
+def AbsorbSwizzleHintToAllocPass :
+ InterfacePass<"iree-codegen-absorb-swizzle-hint-to-alloc", "mlir::FunctionOpInterface"> {
+ let summary = "Absorbs swizzle_hint ops into attributes on the defining alloc";
+}
+
def BlockDynamicDimensionsPass
: Pass<"iree-codegen-block-dynamic-dimensions"> {
let summary = "Expand dynamic dimensions that are known to be multiples of "
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
index d4a2879..e6ddc62 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
@@ -19,6 +19,7 @@
srcs = enforce_glob(
# keep sorted
[
+ "absorb_swizzle_hint_to_alloc.mlir",
"add_fmfs.mlir",
"affinemin_canonicalization.mlir",
"batch_matmuls.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
index 68ef693..0d5889e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
@@ -14,6 +14,7 @@
NAME
lit
SRCS
+ "absorb_swizzle_hint_to_alloc.mlir"
"add_fmfs.mlir"
"affinemin_canonicalization.mlir"
"batch_matmuls.mlir"
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir b/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir
new file mode 100644
index 0000000..3c46587
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/Common/test/absorb_swizzle_hint_to_alloc.mlir
@@ -0,0 +1,22 @@
+// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-absorb-swizzle-hint-to-alloc))" \
+// RUN: --split-input-file --mlir-print-local-scope %s | FileCheck %s
+
+func.func @absorb_swizzle_hint_to_alloc() {
+ %alloc0 = memref.alloc() : memref<8192xbf16, #gpu.address_space<workgroup>>
+ %hint0 = iree_codegen.swizzle_hint %alloc0[#iree_codegen.xor_shuffle<128, 8>]
+ : memref<8192xbf16, #gpu.address_space<workgroup>>
+ %alloc1 = memref.alloc() : memref<16384xbf16, #gpu.address_space<workgroup>>
+ %hint1 = iree_codegen.swizzle_hint %alloc1[#iree_codegen.xor_shuffle<256, 16>]
+ : memref<16384xbf16, #gpu.address_space<workgroup>>
+ memref.dealloc %alloc0 : memref<8192xbf16, #gpu.address_space<workgroup>>
+ memref.dealloc %alloc1 : memref<16384xbf16, #gpu.address_space<workgroup>>
+ return
+}
+
+// CHECK-LABEL: func @absorb_swizzle_hint_to_alloc
+// CHECK-DAG: memref.alloc() {iree_codegen.swizzle = #iree_codegen.xor_shuffle<128, 8>}
+// CHECK-SAME: : memref<8192xbf16, #gpu.address_space<workgroup>>
+// CHECK-DAG: memref.alloc() {iree_codegen.swizzle = #iree_codegen.xor_shuffle<256, 16>}
+// CHECK-SAME: : memref<16384xbf16, #gpu.address_space<workgroup>>
+// CHECK-NOT: iree_codegen.swizzle_hint
+// CHECK: return