)]}'
{
  "commit": "7247601b1967516ef7fd5cc5ae55abd5ced2397a",
  "tree": "322113cd61df8b4f1c536563a570072b05465438",
  "parents": [
    "ce12fef09b38b20e9251546458f97b6d416c296b"
  ],
  "author": {
    "name": "Max191",
    "email": "44243577+Max191@users.noreply.github.com",
    "time": "Thu Apr 30 16:30:48 2026 -0400"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Thu Apr 30 16:30:48 2026 -0400"
  },
  "message": "[LLVMGPU][ROCDL] Add pass to group global loads for better instruction scheduling (#24247)\n\nAdds LLVMGPUGroupGlobalLoadsPass which moves global loads in the same\nblock to be adjacent to each other when they are separated by pure\naddress computation ops. The pass moves each load along with its\ntransitive dependency chain to be right after the preceding global load.\n\nThis improves performance in situations where LLVM is not able to\nconvert address computation into a single base + constant offset. In\nsuch cases, instruction scheduling can become pessimistic and each\nglobal load needs to be waited on before the next is issued. With this\ninstruction reordering, all global loads are issued together after\naddress computation is completed.\n\nBased on benchmarks with this change alone, we don\u0027t have any cases in\nour suite of kernels that runs into this issue today. However, some\nconvolution shapes run into the issue after the changes in\nhttps://github.com/iree-org/iree/pull/24245, and this PR prevents such\nregressions.\n\nThis is only enabled for ROCDL in this PR, because we don\u0027t have any\ndata points to support adding it to other pipelines yet.\n\n---------\n\nSigned-off-by: Max Dawkins \u003cmax.dawkins@gmail.com\u003e\nCo-authored-by: Claude Opus 4.7 (1M context) \u003cnoreply@anthropic.com\u003e",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "f17fa96ad9aca17ff175f17f34d5646160a3603a",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel",
      "new_id": "b304f6b35625e0195527eb597c64ce80c93d89b3",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel"
    },
    {
      "type": "modify",
      "old_id": "cc8c1255d0a22c1eaa7170b14f0923c978c9fa50",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt",
      "new_id": "bc5b5faf90ed1f4b60e0372bd04aa81f8ae2ff88",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "d6c295fd4e18221b7d695a968a607f9a6a288101",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUGroupGlobalLoads.cpp"
    },
    {
      "type": "modify",
      "old_id": "d68d07bda820bc72fd80675ab6ff84dcacd6a252",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp",
      "new_id": "b8cf60c2c08bc568e547eea6724d16517f74bb38",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp"
    },
    {
      "type": "modify",
      "old_id": "a0217e9a0b3d5f43005df9a3a49bcf493a61f50f",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td",
      "new_id": "82bd866e4716d955122cd8af8786cc0bf67a1827",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td"
    },
    {
      "type": "modify",
      "old_id": "b531490b8472144d8e018a8b888ffc5724faf9dd",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel",
      "new_id": "9f6f8e4f600cb4e9c74fc04d559ae0347e8dd253",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel"
    },
    {
      "type": "modify",
      "old_id": "fc70e250d3a3587446fb315c08c6f83cee4ff2c3",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt",
      "new_id": "8b60996d5a5be02d0100f13b2c25cb1e9dfd3355",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "ee2f3800ea9b5f394107109d5dcdab3d8ef231f7",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_group_global_loads.mlir"
    }
  ]
}
