)]}'
{
  "commit": "c4da71ca6bf0ea7d45fe1453e76bd20a17ea26dd",
  "tree": "d7dc2e69c266d5f18a00a85811effbbc6f2660eb",
  "parents": [
    "fac9d3d9d9b1ea55757c3ae94e9b03ff50b6d4f0"
  ],
  "author": {
    "name": "Benoit Jacob",
    "email": "jacob.benoit.1@gmail.com",
    "time": "Thu May 07 16:28:45 2026 -0400"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Thu May 07 20:28:45 2026 +0000"
  },
  "message": "[Codegen][CPU] Add a type-polymorphic generic-scalar MMA fallback. (#24389)\n\nAdds two new `MMAIntrinsic` values, `MMA_GENERIC_SCALAR_1x1x1_REG8` and\n`_REG16`, that the data-tiling cost model picks when no element-type-\nspecific intrinsic on the target supports the matmul\u0027s (LHS, RHS, ACC)\ntypes. This intentionally breaks the \"an MMAIntrinsic enum value pins\ndown a specific element-type triple\" invariant in exchange for not\nhaving to add one enum value per supported triple. Element types live on\nnew `DataTiledMMAAttr.{lhs,rhs,acc}_type` parameters, populated by the\ncost model only when the chosen intrinsic is one of the polymorphic\nvariants.\n\nThe cost model picks `_REG16` on 64-bit ISAs (x86_64, AArch64, RISC-V)\nand `_REG8` on 32-bit ISAs. The number is a register-budget for the\nunroll heuristic — one element of any width occupies one register, but\nthe architectural register file the lowering ends up in (GPR or SIMD-\nscalar lane) is up to LLVM. The budget is encoded in the low byte of the\nenum value, so `chooseUnrolling` can read it back.\n\nSince the intrinsic is 1×1×1, the operand tiles after `intrinsics_m` /\n`intrinsics_n` / `intrinsics_k` are simple row-major (M, K) / (N, K) /\n(M, N) — `linalg.mmt4d`-shaped.\n`DataTiledMMAAttr::buildUnderlyingOperations` therefore short-circuits\nthe swizzle/distribute pipeline for these intrinsics and emits a single\n`vector.contract` directly, with `arith.extf` / `arith.extsi` widening\nnarrow LHS/RHS to ACC\u0027s element type. For sub-byte LHS/RHS types\n`chooseUnrolling` also picks the smallest power-of-two `intrinsics_k`\nsuch that K*lhsBits and K*rhsBits are byte-aligned (e.g. K\u003d2 for i4/f4,\nK\u003d4 for f6, K\u003d8 for i1).\n\nProgress towards #24323",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "7f07eaccb5763ac5e926df85d91ad08e34ad7577",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_aarch64.mlir",
      "new_id": "a4efc1bfb7318b6b15adb3f494f17dfa3840c5e8",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_aarch64.mlir"
    },
    {
      "type": "modify",
      "old_id": "179cb74148e3963446348974a117cebe4e0e661f",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_x86_64.mlir",
      "new_id": "4326e7dc5922ad774aba91599dd8ca3c85cae745",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_x86_64.mlir"
    },
    {
      "type": "modify",
      "old_id": "ad5b6b44cf3617e05420031713d27260fff2d1c2",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUAttrs.cpp",
      "new_id": "ea4a2e1a7dbf2632b330e8cc990f6ca42a10a814",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUAttrs.cpp"
    },
    {
      "type": "modify",
      "old_id": "80548de5b85be6754166f8e1d33d56551dcc3b59",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUAttrs.td",
      "new_id": "3845d9f3eab447042fcd3967d6d9983a16bb5f46",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUAttrs.td"
    },
    {
      "type": "modify",
      "old_id": "b88c59ada90c2fe5a32030728f30d25ee8982c19",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUEnums.td",
      "new_id": "184060d52c08c854f8d87b1e3ebeda5e5972dbd6",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUEnums.td"
    },
    {
      "type": "modify",
      "old_id": "787b726608e3f8bfb0a4b1135a31c776c0f97147",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUTypes.h",
      "new_id": "8892895f04ca1e5d2538e80fdb37a8ac75eac0cb",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/IREECPUTypes.h"
    },
    {
      "type": "modify",
      "old_id": "1ccb6a703f27ffc48c8565af912ec8a16fb2deb2",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/lower_inner_tiled.mlir",
      "new_id": "6ab4842f49c17d064b56e0ff3f296bf7c2d26557",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/Dialect/CPU/IR/test/lower_inner_tiled.mlir"
    },
    {
      "type": "modify",
      "old_id": "8561f3cae94926e56aeb6215fe3f1e972636fead",
      "old_mode": 33188,
      "old_path": "compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp",
      "new_id": "06d0f9c02e82f7a6bbe4165d7f14707edc79fff8",
      "new_mode": 33188,
      "new_path": "compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp"
    }
  ]
}
