)]}'
{
  "commit": "9b97d80cb2c70fcad9e4a5a5b4f0c97822c28f00",
  "tree": "7e766dc866e80c973a223bf5f6849f1b7ea515cb",
  "parents": [
    "8ad7a71b9e81276066a8636ed9947fb432c4829e"
  ],
  "author": {
    "name": "bjacob",
    "email": "benoitjacob@google.com",
    "time": "Tue Sep 20 13:25:34 2022 -0400"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Tue Sep 20 17:25:34 2022 +0000"
  },
  "message": "mmt4d i8mm kernel register fix (#10476)\n\nThis comes on top of #10475, the only actual diff here is in\r\nruntime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_tile_arm_64_i8mm.S.\r\n\r\nI noticed that google benchmark was incorrectly running only 1000\r\niterations of this benchmark. Debugging, its floating point code\r\ncomputing elapsed time got wrong arithmetic in determining whether it\r\nwas time to cut the benchmark short. Eventually I realized my mistake:\r\nthis kernel uses all 32 SIMD registers, including v8--v15 whose bottom\r\n64bit are callee-saved. We were overwriting the google-benchmark code\u0027s\r\ntimestamp value stored in one of those!\r\n\r\nThe standard fix would be to push these 8 x 64bit to stack an pop at the\r\nend, which could be done in a total of 4 store-pair + 4 load-pair \u003d 8\r\ninstructions, but we save those instructions by instead just not using\r\nv8--v15 at all; we didn\u0027t really need them.",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "108a61b5b2fd49ee8acfdfc28c2f3ae9dcc23e00",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_tile_arm_64_i8mm.S",
      "new_id": "9a1700f1b5e671b8a1c4849f4005ae05f770e29f",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_tile_arm_64_i8mm.S"
    }
  ]
}
