)]}'
{
  "commit": "ea01b8ae40ea6cff36e80bb0cfd9c5f85312d6b9",
  "tree": "af448a90bff857e4910912c76324d60ee12826c9",
  "parents": [
    "fc06a5d009c4068b3ef06b7d79fd9a97e879620a"
  ],
  "author": {
    "name": "Alan Li",
    "email": "me@alanli.org",
    "time": "Wed Jan 28 19:03:56 2026 -0500"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Wed Jan 28 19:03:56 2026 -0500"
  },
  "message": "[AMDGPU][LDS] Support linearized DMA for small innermost dimensions (#23056)\n\nWhen the linalg.copy output comes from tensor.empty(), the\n`AMDGPULowerCoalescedDMAToGatherLDS` pass can linearize the destination\nmemref.\n\nThis patch allows coalesced DMA for tensors with small innermost\ndimensions (e.g., `128x16xf32` where `16 \u003c minElementsPerTransfer\u003d64`)\nby linearizing the tensor and using total elements instead of innermost\ndimension for the size check.\n\nThis change gates the optimization: The change in\nGPUConvertToCoalescedDMA simply allows copies with small innermost\ndimensions to proceed when linearization is safe, instead of rejecting\nthem outright.",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "8d4f0fad59a65e8751474efdfbd64e51b1810b2a",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUConvertToCoalescedDMA.cpp",
      "new_id": "ec94c3265fa811549a3dcf8f096d92d215f51b38",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUConvertToCoalescedDMA.cpp"
    },
    {
      "type": "modify",
      "old_id": "e9a35e6b1212e2648de57f273b21b48c21f43eb0",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_convert_to_coalesced_dma.mlir",
      "new_id": "bb52bb47907c9980c3f063a1789079b60af9a1f3",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_convert_to_coalesced_dma.mlir"
    }
  ]
}