CPU ukernels as bitcode (x86-only for now) (#13460)
While #13433 enabled the use of micro kernels within codegen backends
using the plugin mechanism, here the ukernel code is compiled into a
bitcode library. This bitcode library is linked with the generated code
at compilation time. The lowering to LLVM inlines the exact micro kernel
needed from the micro kernel library for a particular architecture. To
enable this end-to-end flow, the following changes are needed
- Add an enum attribute to HAL : `IREE::HAL::CallingConvention` that
allows specifying what calling convention to use for the micro kernel
call. `Default` leaves the params as is, `ParameterStruct` packs all the
returns and arguments into a parameter struct to mimic ABIs like
https://github.com/openxla/iree/blob/6cf092d022810d4347353b23e5ce2688a166dd67/runtime/src/iree/builtins/ukernel/mmt4d.h#L16
- Couple of patterns are added to `ConvertToLLVM` pass, to handle the
lowering of the function definition and function call, in keeping with
the specified ABI
- Allow specification of `hal.import.fields` to specify `processor_data`
and `processor_id` on ukernel function defn. This then generates the
code to forward this information to the microkernels (similar to what is
done for external calls using the plugin mechanism)
- Propagate the target CPU features in `hal.executable.target` of the
dispatch into the micro kernel call. This allows the LLVM passes to walk
through the branching used to pick the right micro kernel function and
effectively inline that.
Co-authored-by: Benoit Jacob <benoitjacob@google.com>
diff --git a/build_tools/bazel/iree_bitcode_library.bzl b/build_tools/bazel/iree_bitcode_library.bzl
index a38fa45..b4f9b38 100644
--- a/build_tools/bazel/iree_bitcode_library.bzl
+++ b/build_tools/bazel/iree_bitcode_library.bzl
@@ -40,23 +40,25 @@
for bitcode_src in srcs:
bitcode_out = "%s_%s.bc" % (name, bitcode_src)
bitcode_files.append(bitcode_out)
+ system_headers = ["immintrin.h"]
native.genrule(
name = "gen_%s" % (bitcode_out),
- srcs = [bitcode_src],
+ srcs = [bitcode_src] + hdrs + [builtin_headers_dep],
outs = [bitcode_out],
cmd = " && ".join([
" ".join([
"$(location %s)" % (clang_tool),
- "-isystem $(BINDIR)/%s" % (builtin_headers_path),
+ "-isystem $(BINDIR)/%s" % builtin_headers_path,
" ".join(copts),
" ".join(["-D%s" % (define) for define in defines]),
+ " ".join(["-I $(BINDIR)/runtime/src"]),
+ " ".join(["-I runtime/src"]),
"-o $(location %s)" % (bitcode_out),
"$(location %s)" % (bitcode_src),
]),
]),
- tools = hdrs + data + [
+ tools = data + [
clang_tool,
- builtin_headers_dep,
],
message = "Compiling %s to %s..." % (bitcode_src, bitcode_out),
output_to_bindir = 1,
@@ -81,3 +83,40 @@
output_to_bindir = 1,
**kwargs
)
+
+def iree_link_bitcode(
+ name,
+ bitcode_files,
+ out = None,
+ link_tool = "@llvm-project//llvm:llvm-link",
+ **kwargs):
+ """Builds an LLVM bitcode library from an input file via clang.
+
+ Args:
+ name: Name of the target.
+ bitcode_files: bitcode files to link together.
+ out: output file name (defaults to name.bc).
+ link_tool: llvm-link tool used for linking bitcode files.
+ **kwargs: any additional attributes to pass to the underlying rules.
+ """
+
+ bitcode_files_qualified = [(("//" + native.package_name() + "/" + b) if b.count(":") else b) for b in bitcode_files]
+
+ if not out:
+ out = "%s.bc" % (name)
+ native.genrule(
+ name = name,
+ srcs = bitcode_files_qualified,
+ outs = [out],
+ cmd = " && ".join([
+ " ".join([
+ "$(location %s)" % (link_tool),
+ "-o $(location %s)" % (out),
+ " ".join(["$(locations %s)" % (src) for src in bitcode_files_qualified]),
+ ]),
+ ]),
+ tools = [link_tool],
+ message = "Linking bitcode library %s to %s..." % (name, out),
+ output_to_bindir = 1,
+ **kwargs
+ )
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
index 4cbc856..d10af72 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
@@ -515,6 +515,20 @@
f"{testonly_block}"
f" PUBLIC\n)\n\n")
+ def iree_link_bitcode(self, name, bitcode_files, data=None, testonly=None):
+ name_block = self._convert_string_arg_block("NAME", name, quote=False)
+ bitcode_files_block = self._convert_srcs_block(
+ [f.replace(":", "/") for f in bitcode_files])
+ data_block = self._convert_target_list_block("DATA", data)
+ testonly_block = self._convert_option_block("TESTONLY", testonly)
+
+ self._converter.body += (f"iree_link_bitcode(\n"
+ f"{name_block}"
+ f"{bitcode_files_block}"
+ f"{data_block}"
+ f"{testonly_block}"
+ f" PUBLIC\n)\n\n")
+
def iree_bytecode_module(self,
name,
src,
diff --git a/build_tools/cmake/iree_bitcode_library.cmake b/build_tools/cmake/iree_bitcode_library.cmake
index 0f17151..05ee273 100644
--- a/build_tools/cmake/iree_bitcode_library.cmake
+++ b/build_tools/cmake/iree_bitcode_library.cmake
@@ -34,10 +34,6 @@
set(_CLANG_TOOL "$<TARGET_FILE:${IREE_CLANG_TARGET}>")
set(_LINK_TOOL "$<TARGET_FILE:${IREE_LLVM_LINK_TARGET}>")
- # These are copied as part of the clang build; we could allow the user to
- # override this but it should be harmless.
- set(_BUILTIN_HEADERS_PATH "${IREE_BINARY_DIR}/third_party/llvm-project/llvm/lib/clang/${CLANG_VERSION_MAJOR}/include/")
-
if(_RULE_TESTONLY AND NOT IREE_BUILD_TESTS)
return()
endif()
@@ -48,7 +44,21 @@
set(_OUT "${_RULE_NAME}.bc")
endif()
- set(_ARGS "-isystem ${_BUILTIN_HEADERS_PATH}")
+ # We need CLANG_VERSION_MAJOR to set up include directories. Unfortunately,
+ # Clang's own CMakeLists do not expose CLANG_VERSION_MAJOR to PARENT_SCOPE.
+ # Likewise with LLVM_VERSION_MAJOR. However, CLANG_EXECUTABLE_VERSION is
+ # CACHE'd, so we can access it, and it currently has the same value.
+ set(_CLANG_VERSION_MAJOR "${CLANG_EXECUTABLE_VERSION}")
+
+ # These are copied as part of the clang build; we could allow the user to
+ # override this but it should be harmless.
+ set(_BUILTIN_HEADERS_PATH "${IREE_BINARY_DIR}/llvm-project/lib/clang/${_CLANG_VERSION_MAJOR}/include/")
+
+ set(_ARGS "")
+ list(APPEND _ARGS "-isystem" "${_BUILTIN_HEADERS_PATH}")
+ list(APPEND _ARGS "-I" "${IREE_SOURCE_DIR}/runtime/src")
+ list(APPEND _ARGS "-I" "${IREE_BINARY_DIR}/runtime/src")
+
list(APPEND _ARGS "${_RULE_COPTS}")
foreach(_DEFINE ${_RULE_DEFINES})
list(APPEND _ARGS "-D${_DEFINE}")
@@ -70,6 +80,7 @@
"${_BITCODE_FILE}"
DEPENDS
${_CLANG_TOOL}
+ ${_LINK_TOOL}
${_BITCODE_SRC}
COMMENT
"Compiling ${_BITCODE_SRC} to ${_BITCODE_FILE}"
@@ -101,3 +112,66 @@
DEPENDS "${_OUT}"
)
endfunction()
+
+# iree_link_bitcode()
+#
+# Builds an LLVM bitcode library from an input file via clang
+#
+# Parameters:
+# NAME: Name of target (see Note).
+# SRCS: Source files to pass to clang.
+# HDRS: Additional headers included by the source files.
+# COPTS: additional flags to pass to clang.
+# DEFINES: Preprocessor definitions to pass to clang.
+# DATA: Additional data required during compilation.
+# OUT: Output file name (defaults to NAME.bc).
+# PUBLIC: Add this so that this library will be exported under ${PACKAGE}::
+# Also in IDE, target will appear in ${PACKAGE} folder while non PUBLIC
+# will be in ${PACKAGE}/internal.
+# TESTONLY: When added, this target will only be built if IREE_BUILD_TESTS=ON.
+function(iree_link_bitcode)
+ cmake_parse_arguments(
+ _RULE
+ "PUBLIC;TESTONLY"
+ "NAME;OUT"
+ "SRCS;DEFINES;DATA"
+ ${ARGN}
+ )
+
+ set(_LINK_TOOL "$<TARGET_FILE:${IREE_LLVM_LINK_TARGET}>")
+
+ if(_RULE_TESTONLY AND NOT IREE_BUILD_TESTS)
+ return()
+ endif()
+
+ if(DEFINED _RULE_OUT)
+ set(_OUT "${_RULE_OUT}")
+ else()
+ set(_OUT "${_RULE_NAME}.bc")
+ endif()
+
+ set(_BITCODE_FILES "${_RULE_SRCS}")
+
+ add_custom_command(
+ OUTPUT
+ ${_OUT}
+ COMMAND
+ ${_LINK_TOOL}
+ ${_BITCODE_FILES}
+ "-o"
+ "${_OUT}"
+ DEPENDS
+ ${_LINK_TOOL}
+ ${_BITCODE_FILES}
+ COMMENT
+ "Linking bitcode to ${_OUT}"
+ VERBATIM
+ )
+
+ # Only add iree_${NAME} as custom target doesn't support aliasing to
+ # iree::${NAME}.
+ iree_package_name(_PACKAGE_NAME)
+ add_custom_target("${_PACKAGE_NAME}_${_RULE_NAME}"
+ DEPENDS "${_OUT}"
+ )
+endfunction()
diff --git a/build_tools/cmake/iree_trace_runner_test.cmake b/build_tools/cmake/iree_trace_runner_test.cmake
index 96ef411..eff4d41 100644
--- a/build_tools/cmake/iree_trace_runner_test.cmake
+++ b/build_tools/cmake/iree_trace_runner_test.cmake
@@ -47,6 +47,10 @@
${ARGN}
)
+ if(CMAKE_CROSSCOMPILING AND "hostonly" IN_LIST _RULE_LABELS)
+ return()
+ endif()
+
iree_package_name(_PACKAGE_NAME)
set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}")
@@ -149,6 +153,10 @@
${ARGN}
)
+ if(CMAKE_CROSSCOMPILING AND "hostonly" IN_LIST _RULE_LABELS)
+ return()
+ endif()
+
# Omit tests for which the specified driver or target backend is not enabled.
# This overlaps with directory exclusions and other filtering mechanisms.
string(TOUPPER ${_RULE_DRIVER} _UPPERCASE_DRIVER)
@@ -293,6 +301,10 @@
${ARGN}
)
+ if(CMAKE_CROSSCOMPILING AND "hostonly" IN_LIST _RULE_LABELS)
+ return()
+ endif()
+
if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS)
set(_RULE_TARGET_BACKENDS "vmvx" "vulkan-spirv" "llvm-cpu")
set(_RULE_DRIVERS "local-task" "vulkan" "local-task")