)]}'
{
  "commit": "691ab414b2cfa19d45797e1ff7871d536b4e1bd0",
  "tree": "275e3c1c3b323f39c1a292f750b6d7968f6bf513",
  "parents": [
    "c3d72e5d37a1de82b7a5c6a1a8ddd160cde68e3a"
  ],
  "author": {
    "name": "bjacob",
    "email": "benoitjacob@google.com",
    "time": "Mon Nov 14 17:11:52 2022 +0000"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Mon Nov 14 12:11:52 2022 -0500"
  },
  "message": "pack ukernel: optimized ARM64 code, benchmark,+refactorings (#11134)\n\nBrings a decently optimized ARM64 implementation of the `iree_uk_pack`\r\nmicrokernel.\r\n\r\nIt\u0027s not super optimized --- not even any assembly code. Just some C\r\nwith intrinsics, aiming at a trade-off with simplicity,\r\ngeneralizability, code size. We even have a naive for loop\r\n`iree_uk_memcpy`, which thanks to `restrict` the compiler is able to\r\nlift to a true `memcpy` while preserving compile-time memcpy size, and\r\nin some cases we just use that.\r\n\r\nAnother thought tilting us towards C is instrumentability: pack is the\r\nkind of place that\u0027s potentially relevant to sanitize (asan, tsan...) so\r\nit\u0027s kind of interesting to keep it in C.\r\n\r\nThe PR is made bigger because this was another instance where\r\ngeneralizing from having 1 microkernel (mmt4d) to more than 1, required\r\nsome groundwork.\r\n\r\n[Performance\r\ncharts](https://docs.google.com/spreadsheets/d/1hK39a9snA_P_e0nXKFyUw-hTtOmp3t2qdEUcK_kFZ64/edit?usp\u003dsharing\u0026resourcekey\u003d0-hQznpmTatcIsl80mONdfmQ)\r\n. Some cases are fairly close to memcpy, other cases are 1 order of\r\nmagnitude slower. Still probably good enough to not dominate profiles\r\n(by contrast, naive for loops before optimization could be 2 to 3 orders\r\nof magnitude slower than memcpy, and that was more clearly a problem).\r\n\r\nCode size on ARM64, release: total 3104 bytes.\r\n\r\n```\r\n0000000000000924 t iree_uk_pack\r\n0000000000000004 t iree_uk_pack_select_tile_func_arch\r\n0000000000000220 t iree_uk_pack_select_tile_func_arm_64\r\n0000000000000032 t iree_uk_pack_select_tile_func_generic\r\n0000000000000260 t iree_uk_pack_tile_8x1_x32_arm_64_direct\r\n0000000000000036 t iree_uk_pack_tile_8x1_x32_arm_64_transpose\r\n0000000000000372 t iree_uk_pack_tile_8x1_x8_arm_64_direct\r\n0000000000000136 t iree_uk_pack_tile_8x1_x8_arm_64_transpose\r\n0000000000000248 t iree_uk_pack_tile_8x4_x8_arm_64_direct\r\n0000000000000080 t iree_uk_pack_tile_8x4_x8_arm_64_transpose\r\n0000000000000104 t iree_uk_pack_tile_8x8_x8_arm_64_direct\r\n0000000000000196 t iree_uk_pack_tile_8x8_x8_arm_64_transpose\r\n0000000000000212 t iree_uk_pack_tile_generic_direct\r\n0000000000000280 t iree_uk_pack_tile_generic_transpose\r\n```",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "6ceb4034a1238c433c71af5605b0c5b10a2061e2",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/BUILD",
      "new_id": "42dbb011050959ccdf70f22fee66f88b5b5eba57",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/BUILD"
    },
    {
      "type": "modify",
      "old_id": "c28c6f54aa17dd692776f3500667351cd2d66d60",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/CMakeLists.txt",
      "new_id": "bbb4b9649cbe85c060052d902a9b0f69afe896d4",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "b5814ce9cf4765fa5f5f6b2dd441deb8a7bafcbe",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/BUILD",
      "new_id": "d6aa4d8aaf5f8edc4fa891df64fa93aefc177996",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/BUILD"
    },
    {
      "type": "modify",
      "old_id": "7e042f6db0312b62b2cc8d6755aabe3e3b325854",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/CMakeLists.txt",
      "new_id": "d39b48088c8e0f3318461e33bb287f415bdecbd2",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "cf55b8d77a1b357dc31c6580b684f2fda5bf9212",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD",
      "new_id": "142f4145d8b934b91f9dc2f585c57f3d73588b7c",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD"
    },
    {
      "type": "modify",
      "old_id": "f10e4e871f8f7034df44bcc18cd0de6722c7c67b",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt",
      "new_id": "7bb6ad9d8f97b35c0e718938f63d39c76dd31f79",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt"
    },
    {
      "type": "rename",
      "old_id": "f861f19af8c5237e7ed4397130971dd6bcee2e43",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_select_tile_arm_64.c",
      "new_id": "d3a6777ce0776847688dcd4bfb1234edf47f2aad",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64.c",
      "score": 87
    },
    {
      "type": "rename",
      "old_id": "c2e1abb36f91e1d752f253ce69b9e03d90490c32",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_select_tile_arm_64.h",
      "new_id": "39e331c9022d4fc79683bce147574f8b51322e71",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64.h",
      "score": 75
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "5da3cb50e4a7054932f08a4ac193930a08bdcd78",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_tile_arm_64.h"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "9c3e3f409adff91d7dd6a379b379bfe941134ff1",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/pack_arm_64.c"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "d61cc9921ba4b37aae5e53d431e5dcd74d015021",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/pack_arm_64.h"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "f3db0b0396464576d556ee26824d5b9b5dfb7a15",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/pack_tile_arm_64.c"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "ac6e3857d92edd05e0c0ceaeb295a388f99f08df",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/arm_64/pack_tile_arm_64.h"
    },
    {
      "type": "rename",
      "old_id": "b1757c798306a6a4d8c9cf4d1f1473d0d93fe6ac",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/mmt4d_select_tile_arch.c",
      "new_id": "fee4cecf32abc9032d9beb1265744dd64b909fce",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/mmt4d_arch.c",
      "score": 77
    },
    {
      "type": "rename",
      "old_id": "5502fcbac8d18587b9e76d604dacf72de48d99cc",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/arch/mmt4d_select_tile_arch.h",
      "new_id": "996b4d4cf22daa1432959612847cf041003b77f3",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/mmt4d_arch.h",
      "score": 77
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "265e9ad8a8ffc1edb6f1873d8bec4a0958e56c4e",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/pack_arch.c"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "16119b01f26159b9a50708de09eeee02bb78d293",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/arch/pack_arch.h"
    },
    {
      "type": "modify",
      "old_id": "307b91ec21c8b69353586e67a7d42c961581f8b9",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/common.c",
      "new_id": "940f363f21a36e4a57f4f8267452137d0b458fed",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/common.c"
    },
    {
      "type": "modify",
      "old_id": "90a437fa34d1d460ee04435186a7d8d9e2c9001a",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/common.h",
      "new_id": "5a04e3bf48f93c5617437863ba2f4fb517bf2306",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/common.h"
    },
    {
      "type": "modify",
      "old_id": "08e75f26e631478828e45c61e63f6c56fd16e13b",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/mmt4d.c",
      "new_id": "1e5cb9dd2743c40d618a6db48317a7a9cc29b0b6",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/mmt4d.c"
    },
    {
      "type": "rename",
      "old_id": "41f13b668d3a3536fc0811b0dc8e5ffcce467a82",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/mmt4d_select_tile_generic.c",
      "new_id": "3c4c38e9ce9bdfbe132e6fe1767f87eab6cdbf3e",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/mmt4d_generic.c",
      "score": 97
    },
    {
      "type": "rename",
      "old_id": "848edf7a87d0ac4b116c8db5cd2b3dff8a7af045",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/mmt4d_select_tile_generic.h",
      "new_id": "848edf7a87d0ac4b116c8db5cd2b3dff8a7af045",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/mmt4d_generic.h",
      "score": 100
    },
    {
      "type": "modify",
      "old_id": "821deceef04d7ba60b164a39430e4ce1cb76c5fe",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/mmt4d_types.h",
      "new_id": "2c2397b89a9ebbc902d23064275a32b2038cc0f3",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/mmt4d_types.h"
    },
    {
      "type": "modify",
      "old_id": "96c623a58edd7db2f9a8216a1dc42e94c28060dc",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/pack.c",
      "new_id": "101020a92f5a2f648accd3b38d73d561dadd9af4",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/pack.c"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "5e40ae796e4f28cb0f13e3faa254b7498d4037bf",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/pack_generic.c"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "50bee3e01ed0953e33cfc0c8f79bf72ecc30d073",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/pack_generic.h"
    },
    {
      "type": "modify",
      "old_id": "dac535372411cdb3782e1753eb245d0aa348d15b",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/pack_types.h",
      "new_id": "fdc77904cc921a71777b8fe6cb6a4c1eba83400c",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/pack_types.h"
    },
    {
      "type": "modify",
      "old_id": "4de5fb0be06e29b8f24227c67cef2a672a5c7b3c",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/BUILD",
      "new_id": "1f7da3eb645f6a9a7f20ce527231baadad16fcac",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/BUILD"
    },
    {
      "type": "modify",
      "old_id": "5273087dbbfc768877f5fcfbb10a41f57a5bc270",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/CMakeLists.txt",
      "new_id": "fe16eab02f382eec9199817158bf5e5efc99c3f0",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/CMakeLists.txt"
    },
    {
      "type": "modify",
      "old_id": "7c8aea4162ceca3c204c313db129b9c921885c8c",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/mmt4d_benchmark.c",
      "new_id": "208d527b07dfb4b71d59f65133b6a77cc4ded283",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/mmt4d_benchmark.c"
    },
    {
      "type": "modify",
      "old_id": "569c9953710c412765d0eb576958c7d31761437d",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/mmt4d_test.cc",
      "new_id": "4193340ab2f9fb9280a88b11fb0f91372df4d166",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/mmt4d_test.cc"
    },
    {
      "type": "add",
      "old_id": "0000000000000000000000000000000000000000",
      "old_mode": 0,
      "old_path": "/dev/null",
      "new_id": "e22eccb4d0b8a3392ec6c6e0a6be96ea0f445904",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/pack_benchmark.c"
    },
    {
      "type": "modify",
      "old_id": "fc3da46de7d33381f0212b23698f6d7db1b932f2",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/pack_test.cc",
      "new_id": "7ccc944fba53f52ca626da577bac98f53ff4b784",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/pack_test.cc"
    },
    {
      "type": "modify",
      "old_id": "a6c33335dbf64eebcedcde43aeb512521f99e800",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/ukernel_test_utils.cc",
      "new_id": "e7e77d4f582254c252894bf16bc450ab9f16252e",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/ukernel_test_utils.cc"
    },
    {
      "type": "modify",
      "old_id": "ff494154257ebc6e464b5a2e3b7454be6f976cc1",
      "old_mode": 33188,
      "old_path": "runtime/src/iree/builtins/ukernel/tools/ukernel_test_utils.h",
      "new_id": "429910e054488ff356ee1608751f5f5bdbcd0415",
      "new_mode": 33188,
      "new_path": "runtime/src/iree/builtins/ukernel/tools/ukernel_test_utils.h"
    }
  ]
}
