)]}'
{
  "commit": "9b634df35b1c55ab085ba6a4d769582d2033f768",
  "tree": "c5d42676a25deb00bb0a5deff0e4a2251a906c64",
  "parents": [
    "fb363f690cf903df27c73483dd08c1d26f28b3c7"
  ],
  "author": {
    "name": "Ben Vanik",
    "email": "ben.vanik@gmail.com",
    "time": "Tue Apr 28 11:45:38 2026 -0700"
  },
  "committer": {
    "name": "Ben Vanik",
    "email": "ben.vanik@gmail.com",
    "time": "Wed Apr 29 13:46:12 2026 -0700"
  },
  "message": "[HAL/AMDGPU] Split kernarg benchmark counters\n\nReport command-buffer kernarg accounting with the same distinction the replay path actually cares about: logical payload bytes, prepublished storage span, and queue-ring reserved bytes. The old queue_kernarg_bytes counter looked like the hot path had no kernarg cost for zero-binding dispatches, even though queue replay still reserves at least one 64-byte kernarg block per non-prepublished dispatch.\n\nKeep the accumulator grouped by representation concept instead of growing another run of similarly prefixed locals. The benchmark walk remains cold instrumentation after the measured loop; command-buffer recording, finalization, and block-processor replay are unchanged.\n",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "b4250a729a409e155284d9055d3decfdfbb68352",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/hal/drivers/amdgpu/util/queue_benchmark.cc",
      "new_id": "a1d42f3096ca18954a0bab556c419a3783f5dabd",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/hal/drivers/amdgpu/util/queue_benchmark.cc"
    }
  ]
}
