)]}'
{
  "commit": "9e98bcdbee19371e04396fd6cfbbc3ab7d4f9930",
  "tree": "44082ff396ff2ce6d69f0e5dcd42f9715d1fa275",
  "parents": [
    "3944dbb147477cc5a5d65579437a6d3ca1b7442a"
  ],
  "author": {
    "name": "bjacob",
    "email": "benoitjacob@google.com",
    "time": "Mon Feb 07 23:04:28 2022 -0500"
  },
  "committer": {
    "name": "GitHub",
    "email": "noreply@github.com",
    "time": "Mon Feb 07 20:04:28 2022 -0800"
  },
  "message": "Generalize the asm kernel pattern. (#8248)\n\nThis makes MMTCustomKernelPattern generic in what kernel it\u0027s\r\ngenerating, in a way that should suffice for the next few kernels that\r\nwe need to add (first motivation: float32/aarch64).\r\n\r\nMMTCustomKernelPattern now takes the kernel-specific details from a\r\nMMTKernel struct, including the asm code and information about the shape\r\nand data types. The inline asm constraints string becomes\r\nauto-generated.\r\n\r\nAll this automation had the effect of making it \"too magical\" for the\r\ninline asm to continue to refer to its register operands by fixed\r\nindices like \"$12\" as the llvm inline asm syntax allows, so we introduce\r\na pre-processing step allowing us to write kernels using placeholders\r\nlike \"$(lhs:0)\" for the 0-th register of the LHS, and that gets\r\n\"allocated\" as the pre-processed code and the constraints string get\r\nco-generated.\r\n\r\nThis makes for a self-contained way to describe kernels. The current\r\naarch64 +dotprod i8 kernel becomes:\r\n\r\n```\r\n// i8*i8-\u003ei32 kernel for Aarch64 NEON +dotprod\r\nMMTKernel MMTKernel_8x4x8_i8i8i32_Aarch64Dotprod_InlineAsm() {\r\n  MMTKernel kernel;\r\n  kernel.arch \u003d CustomKernelTargetArch::Aarch64;\r\n  kernel.m0 \u003d 8;\r\n  kernel.k0 \u003d 4;\r\n  kernel.n0 \u003d 8;\r\n  kernel.lhsType \u003d MMTKernel::ScalarType::I8;\r\n  kernel.rhsType \u003d MMTKernel::ScalarType::I8;\r\n  kernel.accType \u003d MMTKernel::ScalarType::I32;\r\n  kernel.registerBitWidth \u003d PowerOfTwo(128);\r\n  kernel.implAsm \u003d R\"ASM(\r\n      sdot $(acc:0).4s, $(rhs:0).16b, $(lhs:0).4b[0]\r\n      sdot $(acc:1).4s, $(rhs:1).16b, $(lhs:0).4b[0]\r\n      sdot $(acc:2).4s, $(rhs:0).16b, $(lhs:0).4b[1]\r\n      sdot $(acc:3).4s, $(rhs:1).16b, $(lhs:0).4b[1]\r\n      sdot $(acc:4).4s, $(rhs:0).16b, $(lhs:0).4b[2]\r\n      sdot $(acc:5).4s, $(rhs:1).16b, $(lhs:0).4b[2]\r\n      sdot $(acc:6).4s, $(rhs:0).16b, $(lhs:0).4b[3]\r\n      sdot $(acc:7).4s, $(rhs:1).16b, $(lhs:0).4b[3]\r\n      sdot $(acc:8).4s, $(rhs:0).16b, $(lhs:1).4b[0]\r\n      sdot $(acc:9).4s, $(rhs:1).16b, $(lhs:1).4b[0]\r\n      sdot $(acc:10).4s, $(rhs:0).16b, $(lhs:1).4b[1]\r\n      sdot $(acc:11).4s, $(rhs:1).16b, $(lhs:1).4b[1]\r\n      sdot $(acc:12).4s, $(rhs:0).16b, $(lhs:1).4b[2]\r\n      sdot $(acc:13).4s, $(rhs:1).16b, $(lhs:1).4b[2]\r\n      sdot $(acc:14).4s, $(rhs:0).16b, $(lhs:1).4b[3]\r\n      sdot $(acc:15).4s, $(rhs:1).16b, $(lhs:1).4b[3]\r\n    )ASM\";\r\n  return kernel;\r\n}\r\n```",
  "tree_diff": [
    {
      "type": "modify",
      "old_id": "eb1d3cc6b259bc8b6815386e7e15f6863e1a975e",
      "old_mode": 33188,
      "old_path": "iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp",
      "new_id": "62613a5a8a822ffd8bf4f462eba2501cf5ad31b6",
      "new_mode": 33188,
      "new_path": "iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp"
    },
    {
      "type": "modify",
      "old_id": "f456b71fb3b5e8e2e9bb7de686aaf31d0fe22f7a",
      "old_mode": 33188,
      "old_path": "iree/compiler/Utils/CustomKernelsTargetInfo.h",
      "new_id": "375f338e3ff8bb6af197fc3b17717a3379ddc12c",
      "new_mode": 33188,
      "new_path": "iree/compiler/Utils/CustomKernelsTargetInfo.h"
    }
  ]
}
