)]}'
{
  "commit": "c75b6860e6c182f7fcfa0e1aaab4a552b1d12f24",
  "tree": "5ee2df6b15117580c723127b2363d0f34b03a207",
  "parents": [
    "7b9aa284500d1cef0711965ca5b45efd43ce9742"
  ],
  "author": {
    "name": "Zhuoran Yin",
    "email": "zhuoryin@amd.com",
    "time": "Wed Jan 08 13:37:04 2025 -0500"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Wed Jan 08 13:37:04 2025 -0500"
  },
  "message": "[GPU][Codegen] Allowing mfma for narrow problem config sizes (#19615)\n\nThe motivation of this PR is convolution performance for resnet50\r\nconfigs. With this PR (and a few pending ones), conv performance with\r\nigemm pipeline get decent speedup in situation where a standalone\r\ndimension size is smaller than intrinsic size. (Take dispatch 69 as\r\nexample, the select tile m:7, n:512, k:4608 will be rejected from mfma\r\nbecause m tile is smaller than intrinsic size of 16). This happens\r\nbecause previously we are too defensive about when to use intrinsic: in\r\nsituation when alignment is not required, we still enforce mfma to be\r\npicked up only when m/n/k tiles are all larger than intrinsic size.\r\n\r\nWith @nirvedhmeshram\u0027s https://github.com/iree-org/iree/pull/19271 and\r\nhttps://github.com/iree-org/iree/pull/19484, padding is allowed in tile\r\nand fuse matmul and igemm tile and fuse pipelines, it is no longer\r\nnecessary to be as conservative as before. I am therefore getting rid of\r\nthe conditional check that blocks mfma from being picked up.\r\n\r\nThis will impact a few pipelines that use `canTargetIntrinsic()`:\r\n- `LLVMGPUPadAndVectorDistribute` will allow narrow m/n/k dimension\r\nsizes for batch matmul\r\n- In `iree-codegen-rocdl-configuration-pipeline`, will allow narrow\r\nm/n/k dimension sizes for matmul (instead of warp reduction)\r\n\r\n---------\r\n\r\nSigned-off-by: jerryyin \u003czhuoryin@amd.com\u003e",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "8cab5c68924eccdae08520bf3aef04542caf8a77",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUHeuristics.cpp",
      "new_id": "f8e30f31a9613f0e31f5a8173b7bb0dcbc9086fa",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/GPU/GPUHeuristics.cpp"
    },
    {
      "type": "modify",
      "old_id": "d71e7ed459ae3b62bb9db897d51468099579143b",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_gfx942.mlir",
      "new_id": "373b67b04e8fd815c19084a99770e4cad4f864e9",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_gfx942.mlir"
    }
  ]
}