)]}'
{
  "commit": "c3fae2f7443908e91a00eb65a4f330ad1f71e63f",
  "tree": "0e5c15c8e3d9833158f02cdad8cc4d80c8c0b6c3",
  "parents": [
    "4d20b82812951fd971f930f43167c11e46da1c25"
  ],
  "author": {
    "name": "Max191",
    "email": "44243577+Max191@users.noreply.github.com",
    "time": "Thu Oct 24 08:36:54 2024 -0700"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Thu Oct 24 11:36:54 2024 -0400"
  },
  "message": "[LLVMGPU] Use forall workgroup distribution in TileAndFuse pipeline (#18565)\n\nThis switches the TileAndFuse pipeline to use scf.forall distribution.\r\nUsing scf.forall distribution also requires some changes to the pass\r\nordering in the TileAndFuse pipeline, which is also handled by this PR:\r\n1. The main difference is that PackToIntrinsics happens before workgroup\r\ndistribution. Otherwise, collapse_shape ops can end up at the end of the\r\nworkgroup forall, and an extra buffer is created.\r\n2. Pack decomposition is now staged, with packs/unpacks at the function\r\nboundaries being decomposed early before workgroup decomposition, and\r\nthe rest being decomposed after reduction tiling as before. This\r\nprevents unpacks being fused into the workgroup forall and causing the\r\nsame problem as in (1).\r\n3. `ConcretizeMmaShapes` now runs before workgroup tiling as well,\r\nso the resulting collapse_shape on the multi_mma op result can be\r\npropagated to the function boundary before any tiling. This is also to\r\navoid the same problem as in (1).\r\n\r\nThe lowering configs on the MMA path have also changed, since they now\r\nneed to account for inner tile sizes of packing.\r\n\r\ndepends on https://github.com/iree-org/iree/pull/18852\r\n\r\nSigned-off-by: Max Dawkins \u003cmax.dawkins@gmail.com\u003e",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "611a87454ecfeafb1413e7fa1120f62c78141413",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp",
      "new_id": "ca23b0ca6e064b82b9607023c6c545f9b8fb3eb4",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp"
    },
    {
      "type": "modify",
      "old_id": "e8c3de89f80e38f3ff90496859b57522dffba8bf",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp",
      "new_id": "76b1af3204be5ff15654ab3c533d9a70724f0641",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp"
    },
    {
      "type": "modify",
      "old_id": "53952e953549ff64d6e1a29bf09cac630d726785",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir",
      "new_id": "b98e85a797135bc221d78c7a1b53c96f0a14c9af",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir"
    },
    {
      "type": "modify",
      "old_id": "0dc8b0f245a5277e46e037f2e66916db91f8d070",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir",
      "new_id": "912acf310b260f023c558e26e08dd96f76fd69ae",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir"
    },
    {
      "type": "modify",
      "old_id": "5cc0b705419830bb8b4bf8d7d76772b63700fb35",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir",
      "new_id": "d57d1631bd77d6e3b0a06900b68bee28565009f2",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir"
    }
  ]
}