Microkernels: add arm64 bitcode. Test everywhere. (#13846)
This adds arm64 to the ukernels bitcode build, and, following #13825 ,
that is automatically picked up by `iree-compile` when the target is
arm64.
This generalizes the e2e matmul tests BUILD with data-tiling +
microkernels to cover relevant cases on both x86-64 and arm64.
This drops tags on that e2e matmul test, so it's now enabled everywhere.
It just uses generic (not fast) bitcode if we don't have dedicated fast
code for some architecture, but it runs everywhere.
Fixes #13804 .
diff --git a/build_tools/bazel/iree_bitcode_library.bzl b/build_tools/bazel/iree_bitcode_library.bzl
index 0a01299..89ad587 100644
--- a/build_tools/bazel/iree_bitcode_library.bzl
+++ b/build_tools/bazel/iree_bitcode_library.bzl
@@ -89,6 +89,9 @@
# This must match what the runtime is built with.
"-fno-short-wchar",
+ # Enable inline asm.
+ "-fasm",
+
# Object file only in bitcode format:
"-c",
"-emit-llvm",
diff --git a/build_tools/cmake/iree_bitcode_library.cmake b/build_tools/cmake/iree_bitcode_library.cmake
index e6d2ba3..6b175f0 100644
--- a/build_tools/cmake/iree_bitcode_library.cmake
+++ b/build_tools/cmake/iree_bitcode_library.cmake
@@ -65,6 +65,9 @@
# This must match what the runtime is built with.
"-fno-short-wchar"
+ # Enable inline asm.
+ "-fasm"
+
# Object file only in bitcode format:
"-c"
"-emit-llvm"
diff --git a/runtime/src/iree/builtins/ukernel/BUILD.bazel b/runtime/src/iree/builtins/ukernel/BUILD.bazel
index 2dedda2..c3fbce3 100644
--- a/runtime/src/iree/builtins/ukernel/BUILD.bazel
+++ b/runtime/src/iree/builtins/ukernel/BUILD.bazel
@@ -136,6 +136,7 @@
srcs = [
":ukernel_bitcode_32bit_base.bc",
":ukernel_bitcode_64bit_base.bc",
+ "//runtime/src/iree/builtins/ukernel/arch/arm_64:ukernel_bitcode_arm_64.bc",
"//runtime/src/iree/builtins/ukernel/arch/x86_64:ukernel_bitcode_x86_64.bc",
],
c_file_output = "ukernel_bitcode.c",
diff --git a/runtime/src/iree/builtins/ukernel/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
index 73cad50..bd5133f 100644
--- a/runtime/src/iree/builtins/ukernel/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
@@ -140,6 +140,7 @@
NAME
embed_ukernel_bitcode
SRCS
+ "runtime/src/iree/builtins/ukernel/arch/arm_64/ukernel_bitcode_arm_64.bc"
"runtime/src/iree/builtins/ukernel/arch/x86_64/ukernel_bitcode_x86_64.bc"
"ukernel_bitcode_32bit_base.bc"
"ukernel_bitcode_64bit_base.bc"
diff --git a/runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD.bazel b/runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD.bazel
index b77d295..ccd24a7 100644
--- a/runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD.bazel
+++ b/runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD.bazel
@@ -4,7 +4,8 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library")
+load("//build_tools/bazel:build_defs.oss.bzl", "iree_cmake_extra_content")
+load("//build_tools/bazel:iree_bitcode_library.bzl", "iree_bitcode_library", "iree_link_bitcode")
package(
default_visibility = ["//visibility:public"],
@@ -12,34 +13,75 @@
licenses = ["notice"], # Apache 2.0
)
-iree_runtime_cc_library(
- name = "mmt4d_arm_64",
- hdrs = [
- "mmt4d_arm_64.h",
- ],
- deps = ["//runtime/src/iree/builtins/ukernel:internal_headers"],
+#===------------------------------------------------------------------------===#
+# UKernel bitcode files
+#===------------------------------------------------------------------------===#
+
+iree_cmake_extra_content(
+ content = """
+if(IREE_BUILD_COMPILER AND IREE_TARGET_BACKEND_LLVM_CPU)
+""",
+ inline = True,
)
-iree_runtime_cc_library(
- name = "pack_arm_64",
- hdrs = [
- "pack_arm_64.h",
+filegroup(
+ name = "bitcode_internal_headers",
+ srcs = [
+ "//runtime/src/iree/builtins/ukernel/arch/arm_64:common_arm_64.h",
],
- deps = ["//runtime/src/iree/builtins/ukernel:internal_headers"],
)
-iree_runtime_cc_library(
- name = "query_tile_sizes_arm_64",
- hdrs = [
- "query_tile_sizes_arm_64.h",
+UKERNEL_ARM_64_BASE_SRCS = [
+ "mmt4d_arm_64.c",
+ "pack_arm_64.c",
+ "query_tile_sizes_arm_64.c",
+ "unpack_arm_64.c",
+]
+
+iree_bitcode_library(
+ name = "ukernel_bitcode_arm_64_base",
+ srcs = UKERNEL_ARM_64_BASE_SRCS,
+ arch = "arm_64",
+ internal_hdrs = [
+ ":bitcode_internal_headers",
+ "//runtime/src/iree/builtins/ukernel:bitcode_internal_headers",
],
- deps = ["//runtime/src/iree/builtins/ukernel:internal_headers"],
)
-iree_runtime_cc_library(
- name = "unpack_arm_64",
- hdrs = [
- "unpack_arm_64.h",
+iree_bitcode_library(
+ name = "ukernel_bitcode_arm_64_dotprod",
+ srcs = ["mmt4d_arm_64_dotprod.c"],
+ arch = "arm_64",
+ copts = ["-march=armv8.2-a+dotprod"],
+ internal_hdrs = [
+ ":bitcode_internal_headers",
+ "//runtime/src/iree/builtins/ukernel:bitcode_internal_headers",
],
- deps = ["//runtime/src/iree/builtins/ukernel:internal_headers"],
+)
+
+iree_bitcode_library(
+ name = "ukernel_bitcode_arm_64_i8mm",
+ srcs = ["mmt4d_arm_64_i8mm.c"],
+ arch = "arm_64",
+ copts = ["-march=armv8.2-a+i8mm"],
+ internal_hdrs = [
+ ":bitcode_internal_headers",
+ "//runtime/src/iree/builtins/ukernel:bitcode_internal_headers",
+ ],
+)
+
+iree_link_bitcode(
+ name = "ukernel_bitcode_arm_64",
+ bitcode_files = [
+ "ukernel_bitcode_arm_64_base.bc",
+ "ukernel_bitcode_arm_64_dotprod.bc",
+ "ukernel_bitcode_arm_64_i8mm.bc",
+ ],
+)
+
+iree_cmake_extra_content(
+ content = """
+endif() # IREE_BUILD_COMPILER AND IREE_TARGET_BACKEND_LLVM_CPU
+""",
+ inline = True,
)
diff --git a/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
index 3af5693..f876c12 100644
--- a/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
@@ -1,8 +1,64 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
+# runtime/src/iree/builtins/ukernel/arch/arm_64/BUILD.bazel #
+# #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
+# CMake-only content. #
+# #
+# To disable autogeneration for this file entirely, delete this header. #
+################################################################################
+
+iree_add_all_subdirs()
+
+if(IREE_BUILD_COMPILER AND IREE_TARGET_BACKEND_LLVM_CPU)
+
+iree_bitcode_library(
+ NAME
+ ukernel_bitcode_arm_64_base
+ ARCH
+ arm_64
+ SRCS
+ "mmt4d_arm_64.c"
+ "pack_arm_64.c"
+ "query_tile_sizes_arm_64.c"
+ "unpack_arm_64.c"
+)
+
+iree_bitcode_library(
+ NAME
+ ukernel_bitcode_arm_64_dotprod
+ ARCH
+ arm_64
+ SRCS
+ "mmt4d_arm_64_dotprod.c"
+ COPTS
+ "-march=armv8.2-a+dotprod"
+)
+
+iree_bitcode_library(
+ NAME
+ ukernel_bitcode_arm_64_i8mm
+ ARCH
+ arm_64
+ SRCS
+ "mmt4d_arm_64_i8mm.c"
+ COPTS
+ "-march=armv8.2-a+i8mm"
+)
+
+iree_link_bitcode(
+ NAME
+ ukernel_bitcode_arm_64
+ SRCS
+ "ukernel_bitcode_arm_64_base.bc"
+ "ukernel_bitcode_arm_64_dotprod.bc"
+ "ukernel_bitcode_arm_64_i8mm.bc"
+
+)
+
+endif() # IREE_BUILD_COMPILER AND IREE_TARGET_BACKEND_LLVM_CPU
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
if (NOT (IREE_ARCH STREQUAL "arm_64"))
return()
diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel
index ac9975c..9381459 100644
--- a/tests/e2e/matmul/BUILD.bazel
+++ b/tests/e2e/matmul/BUILD.bazel
@@ -140,7 +140,7 @@
# Test VMVX+ukernel, mmt4d, with target CPU features variants relevant to each
# lhs_rhs_type.
[iree_generated_trace_runner_test(
- name = "e2e_matmul_mmt4d_%s_small_ukernel" % lhs_rhs_type,
+ name = "e2e_matmul_mmt4d_%s_small_vmvx_ukernel" % lhs_rhs_type,
compiler_flags = [
"--iree-vmvx-enable-microkernels",
"--iree-flow-enable-data-tiling",
@@ -159,10 +159,27 @@
"f32",
]]
-# Test x86_64+ukernel, mmt4d, with target CPU features variants relevant to each
-# lhs_rhs_type.
+X86_64_AVX2_FMA = [
+ "+avx",
+ "+avx2",
+ "+fma",
+]
+
+X86_64_AVX512_BASE = X86_64_AVX2_FMA + [
+ "+avx512f",
+ "+avx512vl",
+ "+avx512cd",
+ "+avx512bw",
+ "+avx512dq",
+]
+
+X86_64_AVX512_VNNI = X86_64_AVX512_BASE + [
+ "+avx512vnni",
+]
+
+# Test mmt4d with --iree-llvmcpu-enable-microkernels.
[iree_generated_trace_runner_test(
- name = "e2e_matmul_mmt4d_%s_small_ukernel_x86" % lhs_rhs_type,
+ name = "e2e_matmul_mmt4d_%s_%s_ukernel" % (lhs_rhs_type, size),
compiler_flags = [
"--iree-llvmcpu-enable-microkernels",
"--iree-flow-enable-data-tiling",
@@ -170,23 +187,27 @@
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
- "--shapes=small",
- ],
- tags = [
- "hostonly",
- "x86_64_only",
+ "--shapes=%s" % size,
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = [
"default",
- "x86_64:+avx,+avx2,+fma",
- ],
+ "x86_64:" + ",".join(X86_64_AVX2_FMA),
+ "x86_64:" + ",".join(X86_64_AVX512_BASE),
+ ] + ([
+ "x86_64:" + ",".join(X86_64_AVX512_VNNI),
+ "arm_64:+dotprod",
+ "arm_64:+i8mm",
+ ] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
+] for size in [
+ "small",
+ "large",
]]
[iree_generated_trace_runner_test(
diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt
index 8546fae..f6bb508 100644
--- a/tests/e2e/matmul/CMakeLists.txt
+++ b/tests/e2e/matmul/CMakeLists.txt
@@ -212,7 +212,7 @@
iree_generated_trace_runner_test(
NAME
- e2e_matmul_mmt4d_i8_small_ukernel
+ e2e_matmul_mmt4d_i8_small_vmvx_ukernel
GENERATOR
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
@@ -231,6 +231,77 @@
iree_generated_trace_runner_test(
NAME
+ e2e_matmul_mmt4d_f32_small_vmvx_ukernel
+ GENERATOR
+ "generate_e2e_matmul_tests.py"
+ GENERATOR_ARGS
+ "--lhs_rhs_type=f32"
+ "--shapes=small"
+ TRACE_RUNNER
+ iree-e2e-matmul-test
+ TARGET_BACKENDS
+ "vmvx"
+ DRIVERS
+ "local-task"
+ COMPILER_FLAGS
+ "--iree-vmvx-enable-microkernels"
+ "--iree-flow-enable-data-tiling"
+)
+
+iree_generated_trace_runner_test(
+ NAME
+ e2e_matmul_mmt4d_i8_small_ukernel
+ GENERATOR
+ "generate_e2e_matmul_tests.py"
+ GENERATOR_ARGS
+ "--lhs_rhs_type=i8"
+ "--shapes=small"
+ TRACE_RUNNER
+ iree-e2e-matmul-test
+ TARGET_BACKENDS
+ "llvm-cpu"
+ DRIVERS
+ "local-task"
+ COMPILER_FLAGS
+ "--iree-llvmcpu-enable-microkernels"
+ "--iree-flow-enable-data-tiling"
+ TARGET_CPU_FEATURES_VARIANTS
+ "default"
+ "x86_64:+avx,+avx2,+fma"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
+ "arm_64:+dotprod"
+ "arm_64:+i8mm"
+)
+
+iree_generated_trace_runner_test(
+ NAME
+ e2e_matmul_mmt4d_i8_large_ukernel
+ GENERATOR
+ "generate_e2e_matmul_tests.py"
+ GENERATOR_ARGS
+ "--lhs_rhs_type=i8"
+ "--shapes=large"
+ TRACE_RUNNER
+ iree-e2e-matmul-test
+ TARGET_BACKENDS
+ "llvm-cpu"
+ DRIVERS
+ "local-task"
+ COMPILER_FLAGS
+ "--iree-llvmcpu-enable-microkernels"
+ "--iree-flow-enable-data-tiling"
+ TARGET_CPU_FEATURES_VARIANTS
+ "default"
+ "x86_64:+avx,+avx2,+fma"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni"
+ "arm_64:+dotprod"
+ "arm_64:+i8mm"
+)
+
+iree_generated_trace_runner_test(
+ NAME
e2e_matmul_mmt4d_f32_small_ukernel
GENERATOR
"generate_e2e_matmul_tests.py"
@@ -240,47 +311,26 @@
TRACE_RUNNER
iree-e2e-matmul-test
TARGET_BACKENDS
- "vmvx"
- DRIVERS
- "local-task"
- COMPILER_FLAGS
- "--iree-vmvx-enable-microkernels"
- "--iree-flow-enable-data-tiling"
-)
-
-iree_generated_trace_runner_test(
- NAME
- e2e_matmul_mmt4d_i8_small_ukernel_x86
- GENERATOR
- "generate_e2e_matmul_tests.py"
- GENERATOR_ARGS
- "--lhs_rhs_type=i8"
- "--shapes=small"
- TRACE_RUNNER
- iree-e2e-matmul-test
- TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
- LABELS
- "hostonly"
- "x86_64_only"
TARGET_CPU_FEATURES_VARIANTS
"default"
"x86_64:+avx,+avx2,+fma"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
)
iree_generated_trace_runner_test(
NAME
- e2e_matmul_mmt4d_f32_small_ukernel_x86
+ e2e_matmul_mmt4d_f32_large_ukernel
GENERATOR
"generate_e2e_matmul_tests.py"
GENERATOR_ARGS
"--lhs_rhs_type=f32"
- "--shapes=small"
+ "--shapes=large"
TRACE_RUNNER
iree-e2e-matmul-test
TARGET_BACKENDS
@@ -290,12 +340,10 @@
COMPILER_FLAGS
"--iree-llvmcpu-enable-microkernels"
"--iree-flow-enable-data-tiling"
- LABELS
- "hostonly"
- "x86_64_only"
TARGET_CPU_FEATURES_VARIANTS
"default"
"x86_64:+avx,+avx2,+fma"
+ "x86_64:+avx,+avx2,+fma,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq"
)
iree_generated_trace_runner_test(