)]}'
{
  "commit": "ca7e06373768634a9a4a857ce7dd543f299886b2",
  "tree": "e10d26579041fdc9b76baf162e78e7c5e8ad222f",
  "parents": [
    "c6525dd763ae1414b3715b6beb56957e2383a41c"
  ],
  "author": {
    "name": "Lukas Sommer",
    "email": "lukas.sommer@amd.com",
    "time": "Wed May 06 17:18:51 2026 +0200"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Wed May 06 17:18:51 2026 +0200"
  },
  "message": "[VectorDistribute] Lower and distribute `async_dma` (#24299)\n\nPass to distribute and lower `async_dma` operations at the workgroup\nlevel to `amdgpu.gather_to_lds` operations at the thread-level (with\nthreads in each subgroup collaborating).\n\nThe pass shares helpers with the existing GPU pass to distribute\noperations based on layouts, but as the `async_dma` operation does not\nhave `vector` operands or results, lowering and distribution are\nimplemented as a separate pass. The changes to\n`GPUNestedLayoutDistributionPatterns.cpp` are therefore mainly a code\nmove extracting shared helpers to the new\n`GPUNestedLayoutUtils.[h|cpp]`.\n\nThe basic idea of the distribution is to construct a (nested) layout\nthat represents how the data-transfer is split across subgroups and\nthreads to perform the full transfer with direct-to-LDS compatible\noperations. The layout is constructed in stages:\n1. We choose the DMA size for the given target that fulfills the\nrequirements and determine the element tile based on the size of the\ntransfer per thread from the DMA size (`distributeFromInnermost`).\n2. The element tile is given by the number of threads in subgroup\n(`distributeFromInnermost`).\n3. Outer tile is always all-ones.\n4. We distribute the transfer to the configured number of subgroups\n(`distributeFromOutermost`).\n5. Whatever is left after these steps ends up as the batch tile of each\nthread.\n\nOnce we have that layout, we can use the shared helpers for the\nmechanics of distributing the operation.\n\nThe distribution fails if any of the requirements are not met. This is\nmostly a defensive check, the pass inserting the `async_dma` operations\n(will be added in a different PR) should only insert `async_dma`\noperations if the prerequisites can be met with the available DMA sizes\nfor the transfer shape etc. Therefore, the pass also fails if any of the\n`async_dma` operations could not be distributed and lowered.\n\nSwizzling and gather semantics are not part of this PR and will be added\nin follow-up PRs.\n\nThis is part of https://github.com/iree-org/iree/issues/23782.\n\nAssisted-by: Claude Code and Codex\n\n---------\n\nSigned-off-by: Lukas Sommer \u003clukas.sommer@amd.com\u003e",
  "tree_diff": [
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "967a3dcb4b8fbc13e4e879884fbf93f25ca9d893",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/AMDGPULowerAsyncDMA.cpp"
    },
    {
      "type": "modify",
      "old_id": "30ab59ef0a542f3d656607fe85b61aac1de6cf74",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel",
      "new_id": "2fb913b27016011b1bedaf07a68438efd99f0b63",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel"
    },
    {
      "type": "modify",
      "old_id": "38831593f80b90622e57c5225e82056d9d46d0a5",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt",
      "new_id": "b96937a04d04e7b758261d82b24e206dfee4a8f8",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "ccd3c5e867bb3434b9ffc8dd0b78e0666b696133",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUNestedLayoutDistributionPatterns.cpp",
      "new_id": "0c84e50fb2a7d018927fba6f7c38b09839aeac57",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUNestedLayoutDistributionPatterns.cpp"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "1af713d09d2a58aa8f2daf6907b888a081787deb",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUNestedLayoutUtils.cpp"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "0c002a7f834a9864213cdc49c1d379df8eb895e8",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUNestedLayoutUtils.h"
    },
    {
      "type": "modify",
      "old_id": "6c720d3c89249ff31c34c1a3309de62415541d3e",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td",
      "new_id": "f61d5401ed9a17e000a63e6674929ee0f72674aa",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td"
    },
    {
      "type": "modify",
      "old_id": "b233f7ec9391ce86ae566e546f8091c45869f31a",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel",
      "new_id": "8be56b4f8b3a716469d1b9b51d146781eb010966",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel"
    },
    {
      "type": "modify",
      "old_id": "57c163f7b07371ec09060b35c8b4f59e1e921420",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt",
      "new_id": "4d43b4173700ead02e244f2650c44701026ebeb3",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "ee1e91c8c3157804acbd73bb26915a0b16f2f866",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/amdgpu_lower_async_dma.mlir"
    },
    {
      "type": "modify",
      "old_id": "110de3476ffc180b269b7b0ff8b839314479a6f1",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp",
      "new_id": "f4f6e7be3a27ba8a71b5fb87c51a1e05de37760a",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp"
    }
  ]
}
